[med-svn] [r-cran-adegenet] 02/05: New upstream version 2.1.0
Andreas Tille
tille at debian.org
Sun Oct 22 17:39:03 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-adegenet.
commit 456cca3cf78270fefc53db74eafbe82fb49ea907
Author: Andreas Tille <tille at debian.org>
Date: Sun Oct 22 19:33:03 2017 +0200
New upstream version 2.1.0
---
ChangeLog | 46 +++
DESCRIPTION | 39 +--
MD5 | 200 ++++++-----
NAMESPACE | 36 +-
R/AIC.snapclust.R | 23 ++
R/AICc.snapclust.R | 37 ++
R/BIC.snapclust.R | 25 ++
R/KIC.snapclust.R | 36 ++
R/SNPbin.R | 88 ++++-
R/adegenet.package.R | 86 +++--
R/auxil.R | 37 +-
R/basicMethods.R | 28 +-
R/chooseCN.R | 28 +-
R/compoplot.R | 199 +++++++++++
R/constructors.R | 8 +-
R/dapc.R | 301 ++++++----------
R/datasets.R | 36 ++
R/dist.genlight.R | 33 ++
R/dist.genpop.R | 2 +-
R/doc_C_routines.R | 15 +
R/export_to_mvmapper.R | 271 +++++++++++++++
R/find.clust.R | 130 +++++--
R/gengraph.R | 4 +-
R/glFunctions.R | 12 +-
R/glHandle.R | 9 +-
R/{spca.rtests.R => global_local_tests.R} | 0
R/handling.R | 10 +-
R/haploGen.R | 4 +-
R/import.R | 153 +++++----
R/inbreeding.R | 49 +--
R/monmonier.R | 4 +-
R/seqTrack.R | 2 +-
R/showmekittens.R | 45 +++
R/snapclust.R | 465 +++++++++++++++++++++++++
R/snapclust.choose.k.R | 107 ++++++
R/snpposi.R | 7 +-
R/spca.R | 550 ++++++++++++++++++++----------
R/spca_randtest.R | 92 +++++
R/xvalDapc.R | 29 +-
R/zzz.R | 11 +-
README.md | 6 +
data/H3N2.rda | Bin 45216 -> 45128 bytes
data/dapcIllus.rda | Bin 38831 -> 38791 bytes
data/datalist | 1 +
data/eHGDP.rda | Bin 1126695 -> 1126522 bytes
data/hybridtoy.RData | Bin 0 -> 2723 bytes
data/microbov.rda | Bin 28194 -> 28203 bytes
data/nancycats.rda | Bin 4889 -> 4833 bytes
data/rupica.RData | Bin 0 -> 34264 bytes
data/rupica.rda | Bin 30800 -> 0 bytes
data/sim2pop.rda | Bin 6451 -> 6432 bytes
data/spcaIllus.rda | Bin 16449 -> 16388 bytes
man/AIC.snapclust.Rd | 23 ++
man/AICc.Rd | 26 ++
man/BIC.snapclust.Rd | 23 ++
man/H3N2.Rd | 5 +-
man/Hs.Rd | 7 +-
man/Hs.test.Rd | 7 +-
man/KIC.Rd | 26 ++
man/adegenet.package.Rd | 79 +++--
man/auxil.Rd | 17 +-
man/chooseCN.Rd | 11 +-
man/compoplot.Rd | 74 ++++
man/dapcGraphics.Rd | 37 +-
man/dapcIllus.Rd | 7 +-
man/df2genind.Rd | 42 +--
man/doc_C_routines.Rd | 26 ++
man/eHGDP.Rd | 1 -
man/export_to_mvmapper.Rd | 122 +++++++
man/fasta2DNAbin.Rd | 6 +-
man/find.clusters.Rd | 230 +++++++------
man/genind2df.Rd | 7 +-
man/genind2genpop.Rd | 7 +-
man/genlight.Rd | 8 +
man/glAux.Rd | 2 +-
man/glPca.Rd | 2 +-
man/hierarchy-methods.Rd | 7 +-
man/hybridize.Rd | 7 +-
man/hybridtoy.Rd | 42 +++
man/import2genind.Rd | 7 +-
man/makefreq.Rd | 7 +-
man/microbov.Rd | 1 -
man/minorAllele.Rd | 7 +-
man/nancycats.Rd | 1 -
man/new.genind.Rd | 5 +-
man/new.genpop.Rd | 5 +-
man/old2new.Rd | 3 +-
man/population-methods.Rd | 1 -
man/read.PLINK.Rd | 9 +-
man/read.fstat.Rd | 7 +-
man/read.genepop.Rd | 7 +-
man/read.genetix.Rd | 7 +-
man/read.snp.Rd | 11 +-
man/read.structure.Rd | 7 +-
man/repool.Rd | 7 +-
man/rupica.Rd | 1 -
man/scaleGen.Rd | 5 +-
man/seploc.Rd | 2 +-
man/showmekittens.Rd | 25 ++
man/sim2pop.Rd | 7 +-
man/snapclust.Rd | 140 ++++++++
man/snapclust.choose.k.Rd | 33 ++
man/spca.Rd | 167 ++++++---
man/spcaIllus.Rd | 7 +-
man/spca_randtest.Rd | 51 +++
man/strata-methods.Rd | 31 +-
man/tab.Rd | 1 -
man/web.Rd | 5 +-
man/xvalDapc.Rd | 12 +-
src/init.c | 32 ++
tests/testthat/test-findclust.R | 27 ++
tests/testthat/test_accessors.R | 26 ++
tests/testthat/test_compoplot.R | 28 ++
tests/testthat/test_genlight.R | 2 +-
tests/testthat/test_haploGen.R | 5 +
tests/testthat/test_import.R | 180 +++++++---
tests/testthat/test_snapclust.R | 62 ++++
117 files changed, 3930 insertions(+), 1126 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 5fc0771..f7838b8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,49 @@
+
+
+ CHANGES IN ADEGENET VERSION 2.1.0
+NEW FEATURES
+ - new set of functions implementing 'snapclust' for genetic
+ clustering using fast maximum-likelihood estimation; new functions
+ include: `snapclust`, `snapclust.choose.k`, and methods for `AIC`,
+ `AICc`, `BIC`, `KIC`
+
+ - compoplot is now a generic with methods for different objects,
+ including dapc and snapclust
+
+ - spca is now a generic with methods for genind, data.frame and
+ genlight objects
+
+ - a new generic wrapper `export_to_mvmapper` has been added, which
+ exports multivariate analyses (e.g. DAPC, sPCA, or standard dudi
+ objects) with geo-referenced units to serve as input to mvmapper
+ (https://popphylotools.github.io/mvMapper/)
+
+BUG FIXES
+ - fixed registration of C routines which could cause problems with
+ SNPbin and genlight objects.
+
+ - fixed issues relating to `read.PLINK` and the reading of `.map`
+ files (issues 94 and 188)
+
+ - fixed issues relating xvalDapc with non-table objects by registering
+ methods for genind and genlight object (issue 193)
+
+ - find.clusters with clust parameter will no longer throw an error
+ when attempting to find clusters of populations with less than
+ n/10 individuals (issue 184)
+
+
+
+ CHANGES IN ADEGENET VERSION 2.0.2
+BUG FIXES
+ o df2genind will now replace "." in alleles with "_" (or "p" if sep = "_")
+ see https://github.com/thibautjombart/adegenet/issues/132 for details
+
+ o if several locus names would partially match through grep,
+ df2genind would output an incorrect genind object
+ NA-wise. Reported by Elizabeth, see
+ https://github.com/thibautjombart/adegenet/issues/160
+
CHANGES IN ADEGENET VERSION 2.0.1
NEW FEATURES
o Hs is now much faster and will scale better for large number of
diff --git a/DESCRIPTION b/DESCRIPTION
index 2dd559b..e8b596e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,19 +1,17 @@
Package: adegenet
-Version: 2.0.1
+Version: 2.1.0
Encoding: UTF-8
-Date: 2016-02-15
Title: Exploratory Analysis of Genetic and Genomic Data
-Author: Thibaut Jombart, Zhian N. Kamvar, Roman Lustrik, Caitlin Collins, Marie-
- Pauline Beugin, Brian Knaus, Peter Solymos, Klaus Schliep, Ismail Ahmed, Anne
- Cori, Federico Calboli
+Author: Thibaut Jombart, Zhian N. Kamvar, Caitlin Collins, Roman Lustrik, Marie-
+ Pauline Beugin, Brian J. Knaus, Peter Solymos, Vladimir Mikryukov, Klaus Schliep, Tiago Maié, Libor Morkovsky, Ismail Ahmed, Anne Cori, Federico Calboli, RJ Ewing
Maintainer: Thibaut Jombart <thibautjombart at gmail.com>
-Suggests: pegas, hierfstat, akima, maps, splancs, adehabitat, tripack,
- testthat
+Suggests: pegas, hierfstat, akima, maps, splancs, tripack, testthat,
+ poppr
Depends: R (>= 2.14), methods, ade4
Imports: utils, stats, grDevices, MASS, igraph, ape, shiny, ggplot2,
seqinr, parallel, spdep, boot, reshape2, dplyr (>= 0.4.1),
vegan
-URL: http://adegenet.r-forge.r-project.org/
+URL: https://github.com/thibautjombart/adegenet
Description: Toolset for the exploration of genetic and genomic data. Adegenet
provides formal (S4) classes for storing and handling various genetic data,
including genetic markers with varying ploidy and hierarchical population
@@ -22,22 +20,25 @@ Description: Toolset for the exploration of genetic and genomic data. Adegenet
methods (DAPC, sPCA), graphics, statistical tests, simulation tools, distance
and similarity measures, and several spatial methods. A range of both empirical
and simulated datasets is also provided to illustrate various methods.
-Collate: adegenet.package.R datasets.R orthobasis.R classes.R
- constructors.R accessors.R basicMethods.R handling.R auxil.R
- minorAllele.R setAs.R SNPbin.R strataMethods.R
+Collate: adegenet.package.R datasets.R dist.genlight.R orthobasis.R
+ classes.R constructors.R accessors.R basicMethods.R handling.R
+ auxil.R minorAllele.R setAs.R SNPbin.R strataMethods.R
hierarchyMethods.R glHandle.R glFunctions.R glSim.R
find.clust.R hybridize.R scale.R fstat.R import.R seqTrack.R
chooseCN.R genind2genpop.R loadingplot.R sequences.R
gstat.randtest.R makefreq.R colorplot.R monmonier.R spca.R
- coords.monmonier.R haploGen.R old2new.R spca.rtests.R dapc.R
- xvalDapc.R haploPop.R PCtest.R dist.genpop.R Hs.R propShared.R
- export.R HWE.R propTyped.R inbreeding.R glPlot.R gengraph.R
- simOutbreak.R mutations.R snpposi.R snpzip.R pairDist.R
- servers.R zzz.R
+ coords.monmonier.R haploGen.R old2new.R global_local_tests.R
+ dapc.R compoplot.R xvalDapc.R haploPop.R PCtest.R dist.genpop.R
+ Hs.R propShared.R export.R HWE.R propTyped.R inbreeding.R
+ glPlot.R gengraph.R simOutbreak.R mutations.R snpposi.R
+ snpzip.R pairDist.R snapclust.R AIC.snapclust.R
+ AICc.snapclust.R BIC.snapclust.R KIC.snapclust.R
+ snapclust.choose.k.R servers.R showmekittens.R spca_randtest.R
+ export_to_mvmapper.R doc_C_routines.R zzz.R
License: GPL (>= 2)
LazyLoad: yes
-RoxygenNote: 5.0.1
+RoxygenNote: 6.0.1
NeedsCompilation: yes
-Packaged: 2016-02-15 13:36:45 UTC; thibaut
+Packaged: 2017-10-12 15:31:47 UTC; thibaut
Repository: CRAN
-Date/Publication: 2016-02-15 16:12:41
+Date/Publication: 2017-10-12 19:20:52 UTC
diff --git a/MD5 b/MD5
index 1a976ee..814aacf 100644
--- a/MD5
+++ b/MD5
@@ -1,43 +1,52 @@
-043721630a5ac2124992db79e1f647f3 *ChangeLog
-490d91197ac90da3adc38283a67a6ffe *DESCRIPTION
-93fa70ebd003ab02e4db0ac142fc95bb *NAMESPACE
+3a05103a84c00117f95e39500c135bb0 *ChangeLog
+d5d9b279de52d5c84ac9c180acbac1d0 *DESCRIPTION
+d920e1db3e15f61a379b8c794042e067 *NAMESPACE
+5ebf3ab120b8dc51059621f42255e069 *R/AIC.snapclust.R
+957a38141833031d3003f04796430466 *R/AICc.snapclust.R
+ebcd402640f7606dc419654c6293fefa *R/BIC.snapclust.R
872c8ca388f1bb2990513034e17011d9 *R/HWE.R
e15a6bd7d43060f74695502fa53bf86c *R/Hs.R
+3dc4032b28c23600a13e55a9bcf03d86 *R/KIC.snapclust.R
33a612fe584815666b9bc9082557ce1a *R/PCtest.R
-fa3f57d77ca91c708e0cafdc97efe706 *R/SNPbin.R
+703951a322d7ccf91f261dfe2cb6728a *R/SNPbin.R
79b7bbe04137964062fac7a0a6374ec5 *R/accessors.R
-9ea1ac7dac4274163e29ce4e1c2da8d4 *R/adegenet.package.R
-1970c05c3fa87d15bf1dd09e4e81c9c4 *R/auxil.R
-683b7fe45ecc7dfd78ebbc3a7457838c *R/basicMethods.R
-968c4ccae2b61eb3030d2585764d7a79 *R/chooseCN.R
+013734a04306216c776620c40bdf1432 *R/adegenet.package.R
+e849faa072c9ea3afcac13609e2ccd42 *R/auxil.R
+e81a03224dde9b18ce9025ac19125a9b *R/basicMethods.R
+704b961c6c236b785a392c30ef9dc133 *R/chooseCN.R
2dfb172b9f06057918208e5b6faf92ac *R/classes.R
cd17d08e9dd4dd2d6519d25d776a3289 *R/colorplot.R
-6e38af071481fd9eb6e90624997dbd7b *R/constructors.R
+c8fdac27765f30202db6fe0f6668c89a *R/compoplot.R
+933a07671b9788b66fbde6cde7d96053 *R/constructors.R
ab1e2184b59ca229cb476243ff1ceeea *R/coords.monmonier.R
-0823d8f3c39347833650756e631b71a0 *R/dapc.R
-c2513b25956b3f55e5acffe859c2abf8 *R/datasets.R
-cc47de54cf34d30002f211629a1f783d *R/dist.genpop.R
+b208adcfceb7c9a3097799ea5b7a8f48 *R/dapc.R
+973d56774c8a13f4c176d0c136f195e1 *R/datasets.R
+77646de56b068166ce59aeb7420e3a94 *R/dist.genlight.R
+852a9004af860fd07866df62ecfaebe4 *R/dist.genpop.R
+c31c91412be03aac5f39c910b20ccd81 *R/doc_C_routines.R
e9927969f66c1fb7749abc774923582a *R/export.R
-e0fb6aef1e10f63dc9f5559cd63e22fb *R/find.clust.R
+f629a38c92bf046a870023125bd8601f *R/export_to_mvmapper.R
+0a8443644964703358036eab9c2b2847 *R/find.clust.R
f05ecfcf1dfd3285337e364710cf80b0 *R/fstat.R
-c005f8e5858c051934d3f06bbc87d1f8 *R/gengraph.R
+53f9c909634adaace0d97a939455ddb1 *R/gengraph.R
75f9e99579b6b7be57ee2cedd99fada8 *R/genind2genpop.R
-8c9dc746aedf71fd5429232ea1209020 *R/glFunctions.R
-219dbaca187c2bd9237708bd760350ce *R/glHandle.R
+aec2df6a8d13f9b3e5dbbef5858adb75 *R/glFunctions.R
+d8adc34b3134b72786bdc4fec74595dd *R/glHandle.R
305b56351e4d6f3b2bccb34819f61488 *R/glPlot.R
f2a8fbea1d1ff39c304637a8f1520c9b *R/glSim.R
+3a0463e7f263ce83369077c06cf0e8b0 *R/global_local_tests.R
0d2123ab0b7f862bdaa7f5b87e3a884d *R/gstat.randtest.R
-1c1ba8e62592e3fdd22866d20c631267 *R/handling.R
-559969fc2e89d60eac744d96c47c8823 *R/haploGen.R
+32c97c56d40c0a1017dd5348a2242778 *R/handling.R
+54d8c959a1e95218dc5f7378105746d1 *R/haploGen.R
90c2d9985882a2b4108e89be08d2d1ec *R/haploPop.R
0ac4bc04ff3a7ce8835ca77246c78cf4 *R/hierarchyMethods.R
4b5ca941081b19b9fe6bd7fce3aa962b *R/hybridize.R
-d263fc2bc5323249806bbc1747e48779 *R/import.R
-c829ae89a4886b6c733038ff6dbf52f3 *R/inbreeding.R
+2ed751df92eeeead98476d0919dde0cb *R/import.R
+1bd985d69b3cff1a1cea29b30da8f4b9 *R/inbreeding.R
29ab3ad7f85c0f0b436ef82f85e9d63b *R/loadingplot.R
c87829e43be8d3a38526c41928ed7740 *R/makefreq.R
ddddfa12454fd2150f177d9fdf7d6a55 *R/minorAllele.R
-43874f6f46dc94280a88168ca55cef10 *R/monmonier.R
+bacf07301534f4d3c33bed8b481814c5 *R/monmonier.R
72e2e1dfaf7c39b93ff3a2b831c06e51 *R/mutations.R
132c366b1550c82d26d93d9d76f17060 *R/old2new.R
d2dac63f521fc485890d7b6efc338da0 *R/orthobasis.R
@@ -45,28 +54,32 @@ c8cfc80579ea27e5f4babb20aa1c0754 *R/pairDist.R
3d7bfc72bb7585a40c27d8431eb748e2 *R/propShared.R
ce8cdf2b4947ff9f56e15c957abd1074 *R/propTyped.R
7f7d55e100c8a308171dab893f8827f6 *R/scale.R
-618e1e89602600d8d8ecb499d0ad6b2d *R/seqTrack.R
+74c644932aba729c7f0bae2d0cfc10f0 *R/seqTrack.R
f836bd30520f0c673d8635c0ff3b3659 *R/sequences.R
c8067fab3348da179bbc350cece12460 *R/servers.R
c46e97c8ebd6dbc1bae511f6ffb4fc83 *R/setAs.R
+7d75bff5d807b6eeb51f5f6a3446be1d *R/showmekittens.R
684cd5c34cbda7c5360e992401df68f9 *R/simOutbreak.R
-715b1b79c0986055d7baa5733591a537 *R/snpposi.R
+938360215056562f20ee829b9507a445 *R/snapclust.R
+1a50a9b53cc8ae07b3f0ce05901366cb *R/snapclust.choose.k.R
+53badb21d2c40983e5bf31bb125a811f *R/snpposi.R
2d172fbdead950ca125a09e7db041c16 *R/snpzip.R
-dbc307faed68a386c76b55de6345c2c3 *R/spca.R
-3a0463e7f263ce83369077c06cf0e8b0 *R/spca.rtests.R
+7d787254b33643316233c5b81494c1f8 *R/spca.R
+f7934b565f648cf6772e8e9b760f6184 *R/spca_randtest.R
1b132461c8acb40e2ba2980f8736a189 *R/strataMethods.R
-3d0e17e59b2ca9b70a69040c2969d84f *R/xvalDapc.R
-c07833eb4730f890e359abb371631659 *R/zzz.R
-77251ecc14a7ecb0e09c83dc09cfe276 *README.md
-b4624d4cde721ed59922a7eb1af8c1bf *data/H3N2.rda
-629798f46f4a4e6f76a539400b0b790e *data/dapcIllus.rda
-7aa59343958d1c0c2f8a071925a5fd4e *data/datalist
-fc085fa07c539b7a2ea4569e3ac526e8 *data/eHGDP.rda
-524b60d998fc74277bb6a0afab6d19ee *data/microbov.rda
-48490eaf36b0da59e8358adcfcb66794 *data/nancycats.rda
-73ee871ca0c486684a1ef20b265d4abd *data/rupica.rda
-93867d456643e7b80619a2c810b74f70 *data/sim2pop.rda
-c34bcb64bd432c7eb0e0a8235bf5f329 *data/spcaIllus.rda
+3b99f8a7484d38bfd7998bacbf752151 *R/xvalDapc.R
+eef5358b5b6fdc1bd80de199abf78be1 *R/zzz.R
+25c42c62fadf7c9c2dc7a1e24de4f19b *README.md
+8b0ef241ce04d41269e7c6294e52979f *data/H3N2.rda
+eb59241a255beb173411fb6038398905 *data/dapcIllus.rda
+8ce7778cd2ac39c47231a1186e66dd2f *data/datalist
+f6f99d2e60db6aa3739b582589d3e342 *data/eHGDP.rda
+c2092ae7faff469f321e5c4af00aea0e *data/hybridtoy.RData
+07a10a6c006e674e26ee0640a2bf2fc0 *data/microbov.rda
+05d7e9d00e14c3fd214d592b165dc39f *data/nancycats.rda
+5918524c6abb5ae3d2103e162de3b530 *data/rupica.RData
+dbc9da6e17ac338c311dff5f930be354 *data/sim2pop.rda
+40f1f88109829756a94ab07b8b949239 *data/spcaIllus.rda
66f7dc09e53d6bc383bdf5e6ae0b6a1a *inst/CITATION
3195d9e41d03a862de02a0049c436cd4 *inst/dapcServer/server.R
f4ad07cc4532088da6a4749c1e766cd8 *inst/dapcServer/ui.R
@@ -82,104 +95,121 @@ b071452ae7320a21b0709f5533275504 *inst/files/pdH1N1-HA.fasta
a03a7f02d49aded8184a4e8cc8a55b55 *inst/files/pdH1N1-NA.fasta
4d54bb83efe0c672869028631a51fea0 *inst/files/pdH1N1-data.csv
7a5e71b7e9963effe18c1e17562f43e9 *inst/files/usflu.fasta
-81221ea02310cbf1ce476fa57625a8bf *man/H3N2.Rd
+12f7f2e1b045b1bed12ae54c760b1c75 *man/AIC.snapclust.Rd
+9df7d858d63361e0d2e6fc62e562fb76 *man/AICc.Rd
+8ebef7c898cba944052d3f75147ddc4f *man/BIC.snapclust.Rd
+b76117bd2242640da5e1baed5d45fa88 *man/H3N2.Rd
f801d68f4276b8305742d7a41110754e *man/HWE.Rd
-ca2a6d94a3db651863cba1e2d6ba359b *man/Hs.Rd
-63840a619ec1311dabc8dc55bfc4db2d *man/Hs.test.Rd
+2f63cf0ec193a67ff35af9d60a0a0f49 *man/Hs.Rd
+0fb8e0345704f613d846c0fe66e28d08 *man/Hs.test.Rd
+e15c29481a4c8c8f26930eaac64ad229 *man/KIC.Rd
4824fe0317d5c93cb6bfae831e7138e4 *man/SNPbin.Rd
2007cebc554242eeac6335394c239988 *man/accessors.Rd
-2cb9102e58ba72aae2920bad4886d0ef *man/adegenet.package.Rd
+9d2b38f45151291716569f24e3f9736b *man/adegenet.package.Rd
3655909f5122cfa1318df7c38631e9d7 *man/as-methods.Rd
f7797181705e63eb7e49617c382190d8 *man/as.SNPbin.Rd
1300e25b1e6dd3b319d2e5e5b6c9e5c9 *man/as.genlight.Rd
4356bad7f32cee5d0fd3c6507de19299 *man/ascore.Rd
-417addf58817d9b21885ab4894c64246 *man/auxil.Rd
-0d60ca3306ef3048d85183f8ad70c30c *man/chooseCN.Rd
+603f7ed59afb9ba8bd665968ca23504c *man/auxil.Rd
+3543abf35c17626a3607b71c72c9b642 *man/chooseCN.Rd
565439f76e58a83e842df3a5fb5cf497 *man/colorplot.Rd
+4da5bf5209c31cf8adb984499c0fc021 *man/compoplot.Rd
5dadcdda5c96dfad346e1c98fac14afe *man/coords.monmonier.Rd
10142abd4e529f162de66960893b9506 *man/dapc.Rd
-fff09e21c78cad2a9084b50f98abf843 *man/dapcGraphics.Rd
-8290ee86a85e5bd49ac7a72c806341c4 *man/dapcIllus.Rd
-040c7d5e0f79c782e6fd0a661057c8cd *man/df2genind.Rd
+157103133a3b2b7e9f4b7a169e91c0cf *man/dapcGraphics.Rd
+8cf337df6e71b4877831ef78b874202e *man/dapcIllus.Rd
+5c3511c8ee6ba78aae91ea4dc550153e *man/df2genind.Rd
60d761d726d79674e12aa5266d238b29 *man/dist.genpop.Rd
-5c87af0ee059eb2751217e2835a89c2c *man/eHGDP.Rd
-4dcf93a3ecfd8e4e784848d6e29f414f *man/fasta2DNAbin.Rd
+3ff1eb496a3aecfc64e628f1500cb519 *man/doc_C_routines.Rd
+4ba4682a5311891bf81365e99947349a *man/eHGDP.Rd
+0f8ec253419e3407b268919d2dba3c82 *man/export_to_mvmapper.Rd
+6a5c68da68b27901931b2bbe468a3be5 *man/fasta2DNAbin.Rd
bc118c54e88934eac5e6e84ae0b16c10 *man/fasta2genlight.Rd
-5cac58fad12ae61ecb75b687f24e045e *man/find.clusters.Rd
+048b2d9dc8e789539d30b496e2f2248c *man/find.clusters.Rd
effd15da9c3eb41873b1bc143a61106b *man/gengraph.Rd
6da496e65c8612abff30e93cdb399dde *man/genind.Rd
-4943ce62e0ad7add513fc4c67642b0ab *man/genind2df.Rd
-87fa9ad7f5771b7e866092d3d169fb5a *man/genind2genpop.Rd
-54a4f8221d5bf2f472af305063ff936d *man/genlight.Rd
+92c035ba7f3a159ecb6a6b71421594de *man/genind2df.Rd
+1858c8f6ee5d6ff1097725719d6cbcba *man/genind2genpop.Rd
+7538c86b74a61be0b8fefbc49a3c6a20 *man/genlight.Rd
24158e177d5c9947dc68ba0404dc7a12 *man/genpop.Rd
-9d9c94228d93b54e4266bce851b72de5 *man/glAux.Rd
-e103184c7c9ca494e0225fa1b47d5f75 *man/glPca.Rd
+2b7ada9b0143e152b814ff71391f82f4 *man/glAux.Rd
+0f8d5a18af1f672dcfde953acbbe8df6 *man/glPca.Rd
86f001c538892609caf73ae32b8a09d4 *man/glPlot.Rd
bd35b2e6699e062e6c0c974bbc721769 *man/glSim.Rd
dcf52612726ba98e96b2d34d5c686138 *man/haploGen.Rd
-60252735a545aeecebe76b5ffc8ee6a8 *man/hierarchy-methods.Rd
-d40e74e80a98b24ac8e9b2210c39b9fe *man/hybridize.Rd
-d119b0ed77d8a558ea475f926a1201f9 *man/import2genind.Rd
+3957a1a5cbe1cb8ae26ae64fdb0588cd *man/hierarchy-methods.Rd
+d4c6e5c9f82d92258b6122dc92568b8c *man/hybridize.Rd
+6660abf36ebc607adb5f9dc4750eb7f1 *man/hybridtoy.Rd
+b7ebb10ed1bafdc8f81df936ca35b795 *man/import2genind.Rd
353eb2a9cf1a255cc11a4039e15e64a8 *man/inbreeding.Rd
883de57c061c78d2fcdef931d3c238bd *man/isPoly.Rd
36bbe66ad58c913523864910f83078cd *man/loadingplot.Rd
-4d443d3bcb01d83d24be67647430fdca *man/makefreq.Rd
-1112c7892ff82c0fa6cebc69efda3ff6 *man/microbov.Rd
-3d29bcd66a148ceca8c36ce8a925a0fa *man/minorAllele.Rd
+04755ecf8a0bff560c686c41b6b819d0 *man/makefreq.Rd
+f6576754f80b622c74b6ecae10a57987 *man/microbov.Rd
+5ac9b81619dfb53d86504a6a4a1268fb *man/minorAllele.Rd
680500e08a6ce86477fe9d57820dc80d *man/monmonier.Rd
c12df9c827ed9633a19eca5c0b5cf8eb *man/mutations.Rd
-89a79293f5b356383dfad71fa371cdd9 *man/nancycats.Rd
-b957d0956786813cd4c8885c856b59eb *man/new.genind.Rd
-7947a1cea9cf54b0d3e72f0d137279a4 *man/new.genpop.Rd
-87cf329bd0814cbf5a56c343d792f718 *man/old2new.Rd
+87e93da45e0cf46c84e1ca8ad44b9b20 *man/nancycats.Rd
+b62d337495bf0a5e751cfbff67feecbb *man/new.genind.Rd
+d556b38c0248f796d6c1d8e9fdfcf2bf *man/new.genpop.Rd
+066e33567079086c6604e1f7e7bb211b *man/old2new.Rd
c97cf4b6c593ac021696d610acfb9bee *man/pairDist.Rd
-fc28e377a61d5d8cc322ad10c3772aed *man/population-methods.Rd
+3b91f7f91e77717ca360d4e09bf28497 *man/population-methods.Rd
4ba1759cd430dd21d302ee68c1bb5268 *man/propShared.Rd
4bbbe806dc95685a8cdc1df771203347 *man/propTyped.Rd
-4029106603e2674b8d7c9b9f2268a2ed *man/read.PLINK.Rd
-f31a0353cfa2d0efb23a66f8cb5f6a98 *man/read.fstat.Rd
-2a25b77e94a0c3589af2ef42e7198cce *man/read.genepop.Rd
-542cbb02915fdc892cd1323ce7fac1c9 *man/read.genetix.Rd
-a8005b5c48a989d4d2db21522bf3cc6f *man/read.snp.Rd
-d00a71033e9c33a932a82682e877d356 *man/read.structure.Rd
-0e070f2eb4e96e67987caa0062cc369d *man/repool.Rd
-d079ca931df91e9af8f1f4534abc98b2 *man/rupica.Rd
-b966ecd05d7e3c6e702ceb4156dd6a78 *man/scaleGen.Rd
+537fea2bf2780d46639fdbfb09b6bd5b *man/read.PLINK.Rd
+370de6b5e77078307fe24af3ca534eff *man/read.fstat.Rd
+e76cab7735bfdcb6e5ba404d255a6e9a *man/read.genepop.Rd
+58d0f7d0db64539ef7011622ab7f72c5 *man/read.genetix.Rd
+44c455f674f0fb60edf16a226980feee *man/read.snp.Rd
+1d06ab01b69f0124ea4a4a86ae0c03da *man/read.structure.Rd
+40cd398e5e1c99246b1fb4651a46cff0 *man/repool.Rd
+ebadad8d3c41bff22c1f9179b30a7be4 *man/rupica.Rd
+be3904b784639a43289b18c88ed4bf9c *man/scaleGen.Rd
39849f4514f0ad829b7cec2ab9df74eb *man/selpopsize.Rd
-b633e38a6846149db69444308b0b6437 *man/seploc.Rd
+5698f677468fa2beea2d00622c903bdc *man/seploc.Rd
24b3bc34f35e501dae3303929a50d58b *man/seppop.Rd
233b9513b37ba3c94022bde86e8f6e38 *man/seqTrack.Rd
91fd7b6b7e1cebbca9bba3fb3f01cbbe *man/sequences.Rd
06dd5656412bd10552e1351a32177f74 *man/servers.Rd
-39b826570928269b683610e2cec9d9ba *man/sim2pop.Rd
+af52442bf33bab046ff121b0aa1355f2 *man/showmekittens.Rd
+90767402af8bf77cd2feac06a5333fba *man/sim2pop.Rd
+ca5a7bbd30e3e508032653f702bcf550 *man/snapclust.Rd
+6b9afcfa4b16be32241ef7ec33dd8c48 *man/snapclust.choose.k.Rd
8238f9fb5eec829a1978445a5bd5431b *man/snpposi.Rd
a07dc761eaed6ce13e3ba2c1ef1c7949 *man/snpzip.rd
-6395f0c834c0a3d6f219de59a66ef30e *man/spca.Rd
+b8478f2203f480e09bfaaadd2ebc55ae *man/spca.Rd
c657d396cb519d8be6a9eed6848dc2a9 *man/spca.rtests.Rd
-1083eaff78f5f661e960250f340efcea *man/spcaIllus.Rd
-da53207406f7fc948867ad08c1c28eb0 *man/strata-methods.Rd
-c4b1b834ba9aea87ed5d5c4f71121ca4 *man/tab.Rd
+3474eceea6b66ea1bfafc358777570cb *man/spcaIllus.Rd
+4b95bf51a91698da2894d8440b729e59 *man/spca_randtest.Rd
+0c4910974db9c87977b03c11b32fefc4 *man/strata-methods.Rd
+9fc2bf978bbf56b40bb9baa4b064a502 *man/tab.Rd
5b0ca85d0125eeb2c597e88a16a92dde *man/truenames.Rd
2f14395a0ee936db9b13dcdef500b2aa *man/virClasses.Rd
-1e5533cd452dcbd7ee0bdb632ad0b613 *man/web.Rd
-0fdb35d603fd69a78874392573b7d2e2 *man/xvalDapc.Rd
+7167db1d1f0f178749ea1af8bf58e9be *man/web.Rd
+09d7c0499ed779fd0f430d2b9e0027d3 *man/xvalDapc.Rd
3bd8bc67659887935c2085e36fdf723d *src/GLfunctions.c
7fa800842c28da56c0bbcb8c330017d1 *src/GLfunctions.h
1f6a617c93fba4457f732ac048df9aa9 *src/adesub.c
fede1fa0994f4422788d288a681c5c2d *src/adesub.h
+53e256a63374b8c888eb1704b81ed06c *src/init.c
7b131de94b94753b4025ad01e733d887 *src/monmonier-utils.c
30a3776e7b777916198cf651f234dca4 *src/sharedAll.c
8d25c1b6b009f32d9e94ad8a381f5559 *src/snpbin.c
01223918611fabb05f182e35199dc4de *src/snpbin.h
902d68e0a32942936d7bcac7de400bfe *tests/testthat.R
+1e059ba9872ee43edeec99b3482d9baa *tests/testthat/test-findclust.R
01622b18130f09e1b609c9f349a56ceb *tests/testthat/test-prop.R
e552cebb26630d9fe44f7b79f0dbbdfa *tests/testthat/test-seppop.R
-d60457d3c0727d39784a8f9d475f5786 *tests/testthat/test_accessors.R
+34474a1d046f69bb1ae9b1c5af0f9966 *tests/testthat/test_accessors.R
+64c69022f9d492be121322178f080618 *tests/testthat/test_compoplot.R
439335b7a33235df9b82fe46c0ef281d *tests/testthat/test_constructors.R
-71134ec4176bfe49b0edb426c08725e5 *tests/testthat/test_genlight.R
+1443018b5340fd051f09b1448704bbc9 *tests/testthat/test_genlight.R
+c31694f2ca8274b384573c03cad7b985 *tests/testthat/test_haploGen.R
1aaedd2f1b74b37af7579415cf0ff898 *tests/testthat/test_hierarchy.R
-dc171aac72a86ce59e3fdd8cee312f39 *tests/testthat/test_import.R
+130b245531634a5890d7728fb0ef012e *tests/testthat/test_import.R
0339f776dd699f378f07296573af4589 *tests/testthat/test_repool.R
+0093160dba54dcec9d1d7b8e88d4fbf2 *tests/testthat/test_snapclust.R
9c948ede454b03a238d17cb9f2e9efa3 *tests/testthat/test_subset.R
641aa35f562b26c0a2de99a473c212e0 *tests/testthat/test_summary.R
dadd0a55f6ec8ca7953f1e35722ea76e *tests/testthat/test_xval.R
diff --git a/NAMESPACE b/NAMESPACE
index 8db6ef9..6c0e1fa 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,10 @@
# Generated by roxygen2: do not edit by hand
+S3method("[",haploGen)
+S3method(AIC,snapclust)
+S3method(AICc,snapclust)
+S3method(BIC,snapclust)
+S3method(KIC,snapclust)
S3method(as.POSIXct,haploGen)
S3method(as.data.frame,genind)
S3method(as.data.frame,genlight)
@@ -11,13 +16,22 @@ S3method(as.matrix,genind)
S3method(as.matrix,genlight)
S3method(as.matrix,genpop)
S3method(c,SNPbin)
+S3method(cbind,SNPbin)
+S3method(cbind,genlight)
S3method(colorplot,default)
S3method(colorplot,spca)
+S3method(compoplot,dapc)
+S3method(compoplot,matrix)
+S3method(compoplot,snapclust)
S3method(dapc,data.frame)
S3method(dapc,dudi)
S3method(dapc,genind)
S3method(dapc,genlight)
S3method(dapc,matrix)
+S3method(export_to_mvmapper,dapc)
+S3method(export_to_mvmapper,default)
+S3method(export_to_mvmapper,dudi)
+S3method(export_to_mvmapper,spca)
S3method(find.clusters,data.frame)
S3method(find.clusters,genind)
S3method(find.clusters,genlight)
@@ -51,6 +65,7 @@ S3method(print,glPca)
S3method(print,haploGen)
S3method(print,monmonier)
S3method(print,spca)
+S3method(rbind,genlight)
S3method(scatter,dapc)
S3method(scatter,glPca)
S3method(screeplot,spca)
@@ -63,8 +78,18 @@ S3method(snpposi.plot,numeric)
S3method(snpposi.test,DNAbin)
S3method(snpposi.test,integer)
S3method(snpposi.test,numeric)
+S3method(spca,data.frame)
+S3method(spca,default)
+S3method(spca,genind)
+S3method(spca,genpop)
+S3method(spca,matrix)
S3method(summary,dapc)
S3method(summary,spca)
+S3method(xvalDapc,data.frame)
+S3method(xvalDapc,default)
+S3method(xvalDapc,genind)
+S3method(xvalDapc,genlight)
+S3method(xvalDapc,matrix)
export("addStrata<-")
export("hier<-")
export("nameStrata<-")
@@ -72,11 +97,14 @@ export("setPop<-")
export("splitStrata<-")
export("strata<-")
export(.genlab)
+export(.internal_C_routines)
export(.readExt)
export(.render.server.info)
export(.rmspaces)
+export(AICc)
export(Hs)
export(Hs.test)
+export(KIC)
export(addStrata)
export(adegenetIssues)
export(adegenetTutorial)
@@ -84,7 +112,9 @@ export(adegenetWeb)
export(as.genind)
export(as.genpop)
export(chooseCN)
+export(compoplot)
export(df2genind)
+export(export_to_mvmapper)
export(extract.PLINKmap)
export(genind)
export(genind2df)
@@ -107,6 +137,10 @@ export(read.structure)
export(repool)
export(scaleGen)
export(setPop)
+export(showmekittens)
+export(snapclust)
+export(snapclust.choose.k)
+export(spca_randtest)
export(splitStrata)
export(strata)
export(tab)
@@ -178,4 +212,4 @@ importFrom(spdep,"nb2listw")
importFrom(spdep,"relativeneigh")
importFrom(spdep,"tri2nb")
importFrom(vegan,orditorp)
-useDynLib(adegenet)
+useDynLib(adegenet, .registration = TRUE)
diff --git a/R/AIC.snapclust.R b/R/AIC.snapclust.R
new file mode 100644
index 0000000..117b8da
--- /dev/null
+++ b/R/AIC.snapclust.R
@@ -0,0 +1,23 @@
+#' Compute Akaike Information Criterion (AIC) for snapclust
+#'
+#' Do not use. We work on that stuff. Contact us if interested.
+#'
+#' @author Thibaut Jombart \email{thibautjombart@@gmail.com}
+#'
+#' @export
+#'
+#' @param object An object returned by the function \code{\link{snapclust}}.
+#'
+#' @param ... Further arguments for compatibility with the \code{AIC} generic
+#' (currently not used).
+#'
+#' @seealso \code{\link{snapclust}} to generate clustering solutions.
+#'
+#'
+AIC.snapclust <- function(object, ...) {
+
+ ## The number of parameters is defined as:
+ ## (number of independent allele frequencies) x (nb clusters).
+
+ -2 * object$ll + 2 * object$n.param
+}
diff --git a/R/AICc.snapclust.R b/R/AICc.snapclust.R
new file mode 100644
index 0000000..21630c3
--- /dev/null
+++ b/R/AICc.snapclust.R
@@ -0,0 +1,37 @@
+#' Compute Akaike Information Criterion for small samples (AICc) for snapclust
+#'
+#' Do not use. We work on that stuff. Contact us if interested.
+#'
+#' @author Thibaut Jombart \email{thibautjombart@@gmail.com}
+#'
+#' @export
+#'
+#' @param object An object returned by the function \code{\link{snapclust}}.
+#'
+#' @param ... Further arguments for compatibility with the \code{AIC} generic
+#' (currently not used).
+#'
+#' @seealso \code{\link{snapclust}} to generate clustering solutions.
+#'
+#' @rdname AICc
+#'
+AICc <- function(object, ...) {
+ UseMethod("AICc", object)
+}
+
+
+
+
+
+#' @export
+#' @aliases AICc.snapclust
+#' @rdname AICc
+AICc.snapclust <- function(object, ...) {
+
+ ## The number of parameters is defined as:
+ ## (number of independent allele frequencies) x (nb clusters).
+ k <- object$n.param
+ n <- length(object$group)
+ -2 * object$ll + (2 * k * n) / (n - k - 1)
+
+}
diff --git a/R/BIC.snapclust.R b/R/BIC.snapclust.R
new file mode 100644
index 0000000..8899339
--- /dev/null
+++ b/R/BIC.snapclust.R
@@ -0,0 +1,25 @@
+#' Compute Bayesian Information Criterion (BIC) for snapclust
+#'
+#' Do not use. We work on that stuff. Contact us if interested.
+#'
+#' @author Thibaut Jombart \email{thibautjombart@@gmail.com}
+#'
+#' @export
+#'
+#' @param object An object returned by the function \code{\link{snapclust}}.
+#'
+#' @param ... Further arguments for compatibility with the \code{BIC} generic
+#' (currently not used).
+#'
+#' @seealso \code{\link{snapclust}} to generate clustering solutions.
+#'
+#'
+BIC.snapclust <- function(object, ...) {
+
+ ## The number of parameters is defined as:
+ ## (number of independent allele frequencies) x (nb clusters).
+
+ n <- length(object$group)
+
+ -2 * object$ll + log(n) * object$n.param
+}
diff --git a/R/KIC.snapclust.R b/R/KIC.snapclust.R
new file mode 100644
index 0000000..183eaba
--- /dev/null
+++ b/R/KIC.snapclust.R
@@ -0,0 +1,36 @@
+#' Compute Akaike Information Criterion for small samples (AICc) for snapclust
+#'
+#' Do not use. We work on that stuff. Contact us if interested.
+#'
+#' @author Thibaut Jombart \email{thibautjombart@@gmail.com}
+#'
+#' @export
+#'
+#' @param object An object returned by the function \code{\link{snapclust}}.
+#'
+#' @param ... Further arguments for compatibility with the \code{AIC} generic
+#' (currently not used).
+#'
+#' @seealso \code{\link{snapclust}} to generate clustering solutions.
+#'
+#' @rdname KIC
+#'
+KIC <- function(object, ...) {
+ UseMethod("KIC", object)
+}
+
+
+
+
+
+#' @export
+#' @aliases KIC.snapclust
+#' @rdname KIC
+KIC.snapclust <- function(object, ...) {
+
+ ## The number of parameters is defined as:
+ ## (number of independent allele frequencies) x (nb clusters).
+ k <- object$n.param
+ -2 * object$ll + 3 * (k + 1)
+
+}
diff --git a/R/SNPbin.R b/R/SNPbin.R
index c6ca07a..7fb3f55 100644
--- a/R/SNPbin.R
+++ b/R/SNPbin.R
@@ -165,7 +165,7 @@ setMethod("initialize", "SNPbin", function(.Object, ...) {
########################
## genlight constructor
########################
-setMethod("initialize", "genlight", function(.Object, ..., parallel=require("parallel"), n.cores=NULL) {
+setMethod("initialize", "genlight", function(.Object, ..., parallel=FALSE, n.cores=NULL) {
if(parallel && !require(parallel)) stop("parallel package requested but not installed")
if(parallel && is.null(n.cores)){
n.cores <- parallel::detectCores()
@@ -212,7 +212,7 @@ setMethod("initialize", "genlight", function(.Object, ..., parallel=require("par
}
##input$gen <- lapply(1:nrow(input$gen), function(i) as.integer(input$gen[i,]))
if(parallel){
- x at gen <- mclapply(1:nrow(input$gen), function(i) new("SNPbin", as.integer(input$gen[i,])),
+ x at gen <- parallel::mclapply(1:nrow(input$gen), function(i) new("SNPbin", as.integer(input$gen[i,])),
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE)
} else {
x at gen <- lapply(1:nrow(input$gen), function(i) new("SNPbin", as.integer(input$gen[i,])) )
@@ -240,7 +240,7 @@ setMethod("initialize", "genlight", function(.Object, ..., parallel=require("par
## create SNPbin list
if(parallel){
- x at gen <- mclapply(input$gen, function(e) new("SNPbin",e), mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE)
+ x at gen <- parallel::mclapply(input$gen, function(e) new("SNPbin",e), mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE)
} else {
x at gen <- lapply(input$gen, function(e) new("SNPbin",e))
}
@@ -267,7 +267,7 @@ setMethod("initialize", "genlight", function(.Object, ..., parallel=require("par
## create SNPbin list
if(parallel){
- x at gen <- mclapply(1:nrow(input$gen), function(i) f1(input$gen[i,]), mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE)
+ x at gen <- parallel::mclapply(1:nrow(input$gen), function(i) f1(input$gen[i,]), mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE)
} else {
x at gen <- lapply(1:nrow(input$gen), function(i) f1(input$gen[i,]))
}
@@ -936,8 +936,25 @@ setReplaceMethod("other","genlight",function(x,value) {
vecSnp <- as.integer(vecSnp)
##vecRaw <- integer(nbBytes) # no longer needed - sending raw type directly
vecRaw <- raw(nbBytes)
-
- vecRaw <- .C("binIntToBytes", vecSnp, length(vecSnp), vecRaw, nbBytes, PACKAGE="adegenet")[[3]]
+ # Wed Apr 12 08:26:49 2017 ------------------------------
+ # I changed this from the C function to the base R function `packBits()`
+ # because I was getting an error on solaris sparc. It suffers an increase
+ # of a fraction of a millisecond in processing time, but I think that's
+ # managable.
+ #
+ # ZNK
+ #
+ # library(microbenchmark)
+ # set.seed(5000)
+ # dat <- sample(c(0,1,NA), 1e5, prob=c(.495, .495, .01), replace=TRUE)
+ # y <- microbenchmark(C = .bin2raw(dat), base = .bin2raw_original(dat), times = 1000)
+ # print(y, "relative")
+ ## Unit: relative
+ ## expr min lq mean median uq max neval cld
+ ## C 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1000 a
+ ## base 1.074409 1.113972 1.103931 1.103837 1.083847 1.183597 1000 a
+ vecRaw <- packBits(vecSnp)
+ # vecRaw <- .C("binIntToBytes", vecSnp, length(vecSnp), vecRaw, nbBytes, PACKAGE="adegenet")[[3]]
## vecraw <- sapply(seq(1, by=8, length=nbBytes), function(i) which(apply(SNPCOMB,1, function(e) all(temp[i:(i+7)]==e))) ) # old R version
## return result
@@ -947,9 +964,6 @@ setReplaceMethod("other","genlight",function(x,value) {
-
-
-
###########
## .raw2bin
###########
@@ -960,6 +974,8 @@ setReplaceMethod("other","genlight",function(x,value) {
## colnames(SNPCOMB) <- NULL
## res <- unlist(lapply(as.integer(x), function(i) SNPCOMB[i+1,]))
res <- .C("bytesToBinInt", x, length(x), integer(length(x)*8), PACKAGE="adegenet")[[3]]
+ # Below is an equivalent function in base, but it's slowed down 8x
+ # res <- as.integer(rawToBits(x))
return(res)
} # end .raw2bin
@@ -971,11 +987,26 @@ setReplaceMethod("other","genlight",function(x,value) {
.SNPbin2int <- function(x){
##res <- lapply(x at snp, .raw2bin)
resSize <- length(x at snp[[1]])*8
+ # Wed Apr 12 08:49:02 2017 ------------------------------
+ # I am leaving this function along as it does not necessarily break solaris,
+ # but I am leaving the code and timings just in case.
+ #
+ # ZNK
res <- .C("bytesToInt", unlist(x at snp), length(x at snp[[1]]), length(x at snp), integer(resSize), as.integer(resSize), PACKAGE="adegenet")[[4]][1:nLoc(x)]
- ##res <- lapply(res, function(e) e[1:x at n.loc])
- ##res <- as.integer(Reduce("+", res))
- if(length(x at NA.posi)>0){
- res[x at NA.posi] <- NA
+ # library(microbenchmark)
+ # set.seed(5000)
+ # dat <- sample(c(0:2,NA), 1e5, prob=c(rep(.995/5,3), 0.005), replace=TRUE)
+ # x <- new("SNPbin", dat)
+ # y <- microbenchmark(C = .SNPbin2int(x), base = .SNPbin2int1(x), times = 1000)
+ # print(y, "relative")
+ ## Unit: relative
+ ## expr min lq mean median uq max neval cld
+ ## C 1.000000 1.000000 1.000000 1.000000 1.00000 1.0000000 1000 a
+ ## base 2.206831 1.200831 1.019783 1.168149 1.02654 0.1668163 1000 a
+ # res <- vapply(x at snp, function(x) as.integer(rawToBits(x)), integer(resSize))
+ # res <- apply(res[1:nLoc(x), ], 1, sum)
+ if (length(x at NA.posi) > 0){
+ res[x at NA.posi] <- NA_integer_
}
return(res)
} # end .SNPbin2int
@@ -1049,12 +1080,12 @@ as.list.genlight <- function(x, ...){
setGeneric("as.SNPbin", function(x, ...) standardGeneric("as.SNPbin"))
setGeneric("as.genlight", function(x, ...) standardGeneric("as.genlight"))
-setAs("integer", "SNPbin", def=function(from){
+setAs("integer", "SNPbin", def = function(from){
res <- new("SNPbin", from)
return(res)
})
-setAs("numeric", "SNPbin", def=function(from){
+setAs("numeric", "SNPbin", def = function(from){
res <- new("SNPbin", from)
return(res)
})
@@ -1093,6 +1124,33 @@ setMethod("as.genlight", "list", function(x, ...) as(x, "genlight"))
+setMethod("tab", "genlight",
+ function(x, freq = FALSE,
+ NA.method = c("mean", "asis", "zero")) {
+
+ NA.method <- match.arg(NA.method)
+ out <- as.matrix(x)
+
+ if (freq) {
+ out <- out / ploidy(x)
+ }
+
+ if (NA.method == "mean"){
+ f1 <- function(vec){
+ m <- mean(vec, na.rm = TRUE)
+ vec[is.na(vec)] <- m
+ return(vec)
+ }
+
+ out <- apply(out, 2, f1)
+
+ }
+ if (NA.method == "zero"){
+ out[is.na(out)] <- ifelse(freq, 0, 0L)
+ }
+
+ return(out)
+ })
diff --git a/R/adegenet.package.R b/R/adegenet.package.R
index 303aaba..842d6f5 100644
--- a/R/adegenet.package.R
+++ b/R/adegenet.package.R
@@ -91,34 +91,34 @@
#' \code{\link{pop}} sets the population of a set of genotypes.\cr
#'
#' === ANALYZING DATA ===\cr Several functions allow to use usual, and less
-#' usual analyses:\cr - \code{\link{HWE.test.genind}}: performs HWE test for
-#' all populations and loci combinations \cr - \code{\link{dist.genpop}}: computes 5 genetic distances among populations.
-#' \cr - \code{\link{monmonier}}: implementation of the Monmonier algorithm,
-#' used to seek genetic boundaries among individuals or populations. Optimized
-#' boundaries can be obtained using \code{\link{optimize.monmonier}}. Object of
-#' the class \code{monmonier} can be plotted and printed using the
-#' corresponding methods. \cr - \code{\link{spca}}: implements Jombart et al.
-#' (2008) spatial Principal Component Analysis \cr -
-#' \code{\link{global.rtest}}: implements Jombart et al. (2008) test for global
-#' spatial structures \cr - \code{\link{local.rtest}}: implements Jombart et
-#' al. (2008) test for local spatial structures \cr - \code{\link{propShared}}:
-#' computes the proportion of shared alleles in a set of genotypes (i.e. from a
-#' genind object)\cr - \code{\link{propTyped}}: function to investigate missing
-#' data in several ways \cr - \code{\link{scaleGen}}: generic method to scale
-#' \linkS4class{genind} or \linkS4class{genpop} before a principal component
-#' analysis \cr - \code{\link{Hs}}: computes the average expected
-#' heterozygosity by population in a \linkS4class{genpop}. Classically Used as
-#' a measure of genetic diversity.\cr - \code{\link{find.clusters}} and
-#' \code{\link{dapc}}: implement the Discriminant Analysis of Principal
-#' Component (DAPC, Jombart et al., 2010).\cr - \code{\link{seqTrack}}:
-#' implements the SeqTrack algorithm for recontructing transmission trees of
-#' pathogens (Jombart et al., 2010) .\cr \code{\link{glPca}}: implements PCA
-#' for \linkS4class{genlight} objects.\cr - \code{\link{gengraph}}: implements
-#' some simple graph-based clustering using genetic data. -
-#' \code{\link{snpposi.plot}} and \code{\link{snpposi.test}}: visualize the
-#' distribution of SNPs on a genetic sequence and test their randomness. -
-#' \code{\link{adegenetServer}}: opens up a web interface for some
-#' functionalities of the package (DAPC with cross validation and feature
+#' usual analyses:\cr - \code{\link{HWE.test.genind}}: performs HWE test for all
+#' populations and loci combinations \cr - \code{\link{dist.genpop}}: computes 5
+#' genetic distances among populations. \cr - \code{\link{monmonier}}:
+#' implementation of the Monmonier algorithm, used to seek genetic boundaries
+#' among individuals or populations. Optimized boundaries can be obtained using
+#' \code{\link{optimize.monmonier}}. Object of the class \code{monmonier} can be
+#' plotted and printed using the corresponding methods. \cr -
+#' \code{\link{spca}}: implements Jombart et al. (2008) spatial Principal
+#' Component Analysis \cr - \code{\link{global.rtest}}: implements Jombart et
+#' al. (2008) test for global spatial structures \cr -
+#' \code{\link{local.rtest}}: implements Jombart et al. (2008) test for local
+#' spatial structures \cr - \code{\link{propShared}}: computes the proportion of
+#' shared alleles in a set of genotypes (i.e. from a genind object)\cr -
+#' \code{\link{propTyped}}: function to investigate missing data in several ways
+#' \cr - \code{\link{scaleGen}}: generic method to scale \linkS4class{genind} or
+#' \linkS4class{genpop} before a principal component analysis \cr -
+#' \code{\link{Hs}}: computes the average expected heterozygosity by population
+#' in a \linkS4class{genpop}. Classically Used as a measure of genetic
+#' diversity.\cr - \code{\link{find.clusters}} and \code{\link{dapc}}: implement
+#' the Discriminant Analysis of Principal Component (DAPC, Jombart et al.,
+#' 2010).\cr - \code{\link{seqTrack}}: implements the SeqTrack algorithm for
+#' recontructing transmission trees of pathogens (Jombart et al., 2010) .\cr
+#' \code{\link{glPca}}: implements PCA for \linkS4class{genlight} objects.\cr -
+#' \code{\link{gengraph}}: implements some simple graph-based clustering using
+#' genetic data. - \code{\link{snpposi.plot}} and \code{\link{snpposi.test}}:
+#' visualize the distribution of SNPs on a genetic sequence and test their
+#' randomness. - \code{\link{adegenetServer}}: opens up a web interface for
+#' some functionalities of the package (DAPC with cross validation and feature
#' selection).\cr
#'
#' === GRAPHICS ===\cr - \code{\link{colorplot}}: plots points with associated
@@ -212,6 +212,15 @@
#' @S3method as.matrix genlight
#' @S3method as.matrix genpop
#' @S3method c SNPbin
+#' @S3method cbind SNPbin
+#' @S3method "[" haploGen
+#' @S3method cbind genlight
+#' @S3method rbind genlight
+#' @S3method xvalDapc data.frame
+#' @S3method xvalDapc default
+#' @S3method xvalDapc genind
+#' @S3method xvalDapc genlight
+#' @S3method xvalDapc matrix
#' @S3method colorplot default
#' @S3method colorplot spca
#' @S3method dapc data.frame
@@ -262,11 +271,17 @@
#' @S3method snpposi.test DNAbin
#' @S3method snpposi.test integer
#' @S3method snpposi.test numeric
+#' @S3method spca default
+#' @S3method spca matrix
+#' @S3method spca data.frame
+#' @S3method spca genind
+#' @S3method spca genpop
#' @S3method summary dapc
#' @S3method summary spca
#' @S3method print genindSummary
#' @S3method print genpopSummary
#'
+#'
#' @import methods
#'
#' @import parallel
@@ -285,15 +300,22 @@
#'
#' @importFrom MASS "lda"
#'
-#' @importFrom ape "as.character.DNAbin" "as.DNAbin" "as.DNAbin.alignment" "as.DNAbin.character" "as.DNAbin.list" "as.list.DNAbin" "as.matrix.DNAbin" "cbind.DNAbin" "c.DNAbin" "[.DNAbin" "labels.DNAbin" "print.DNAbin" "rbind.DNAbin" "dist.dna" "seg.sites"
+#' @importFrom ape "as.character.DNAbin" "as.DNAbin" "as.DNAbin.alignment"
+#' "as.DNAbin.character" "as.DNAbin.list" "as.list.DNAbin" "as.matrix.DNAbin"
+#' "cbind.DNAbin" "c.DNAbin" "[.DNAbin" "labels.DNAbin" "print.DNAbin"
+#' "rbind.DNAbin" "dist.dna" "seg.sites"
#'
-#' @importFrom igraph "graph.data.frame" "V" "V<-" "E" "E<-" "layout.fruchterman.reingold" "as.igraph" "plot.igraph" "print.igraph" "graph.adjacency" "clusters"
+#' @importFrom igraph "graph.data.frame" "V" "V<-" "E" "E<-"
+#' "layout.fruchterman.reingold" "as.igraph" "plot.igraph" "print.igraph"
+#' "graph.adjacency" "clusters"
#'
#' @importFrom shiny "runApp" "renderPrint"
#'
-#' @importFrom ggplot2 "ggplot" "geom_density" "geom_rug" "labs" "aes" "xlim" "guides" "guide_legend" "geom_boxplot" "geom_violin" "geom_jitter" "coord_flip"
+#' @importFrom ggplot2 "ggplot" "geom_density" "geom_rug" "labs" "aes" "xlim"
+#' "guides" "guide_legend" "geom_boxplot" "geom_violin" "geom_jitter"
+#' "coord_flip"
#'
-#' @useDynLib adegenet
+#' @useDynLib adegenet, .registration = TRUE
#'
diff --git a/R/auxil.R b/R/auxil.R
index 6250ecb..a084dd1 100644
--- a/R/auxil.R
+++ b/R/auxil.R
@@ -98,8 +98,14 @@ adegenetWeb <- function(){
#' @rdname web
#' @export
-adegenetTutorial <- function(which=c("basics","spca","dapc","genomics","strata")){
- which <- match.arg(which)
+adegenetTutorial <- function(which = c("basics","spca","dapc","genomics","strata")){
+
+ ## which <- match.arg(which)
+ which <- which[1]
+ choices <- c("basics","spca","dapc","genomics","strata","genclust")
+ if (!which %in% choices) {
+ stop("Unknown tutorial")
+ }
if(which=="basics"){
url <- "https://github.com/thibautjombart/adegenet/raw/master/tutorials/tutorial-basics.pdf"
cat("\n")
@@ -135,6 +141,13 @@ adegenetTutorial <- function(which=c("basics","spca","dapc","genomics","strata")
cat(" >> Seeking url: ",url,"\n ", sep="")
cat("\n")
}
+ if(which=="genclust"){
+ url <- "https://github.com/thibautjombart/adegenet/raw/master/tutorials/tutorial-genclust.pdf"
+ cat("\n")
+ cat(" >> Opening the genclust tutorial.\n")
+ cat(" >> Seeking url: ",url,"\n ", sep="")
+ cat("\n")
+ }
browseURL(url)
@@ -251,7 +264,7 @@ corner <- function(text, posi="topleft", inset=0.1, ...){
char <- c("?","??","?!?!?")
for(i in 1:3){
cat("\nGrind", char[i], " (y/N): ")
- x <- readLines(n=1)
+ x <- readLines(con = getOption('adegenet.testcon'), n=1)
if(x!="y") {
cat("\n =( \n")
return(invisible())
@@ -384,3 +397,21 @@ funky <- colorRampPalette(c("#A6CEE3","#1F78B4","#B2DF8A",
"#33A02C","#FB9A99","#E31A1C",
"#FDBF6F","#FF7F00","#CAB2D6",
"#6A3D9A","#FFFF99","#B15928"))
+
+
+
+## viridis
+virid <- colorRampPalette(c("#440154FF", "#482173FF", "#433E85FF", "#38598CFF",
+ "#2D708EFF", "#25858EFF", "#1E9B8AFF", "#2BB07FFF",
+ "#51C56AFF", "#85D54AFF", "#C2DF23FF", "#FDE725FF"))
+
+
+## reorder colors for hybrids
+hybridpal <- function(col.pal = virid) {
+ function(n) {
+ if (n < 3) {
+ return(col.pal(n))
+ }
+ col.pal(n)[c(1, n, 2:(n-1))]
+ }
+}
diff --git a/R/basicMethods.R b/R/basicMethods.R
index f54e864..3350a4c 100644
--- a/R/basicMethods.R
+++ b/R/basicMethods.R
@@ -37,10 +37,10 @@ setMethod("[", signature(x="genind", i="ANY", j="ANY", drop="ANY"), function(x,
old.i <- i
i <- match(i, indNames(x))
if(any(is.na(i))){
- warning(paste("the following specified individuals do not exist:", old.i[is.na(i)]))
+ warning(paste("the following specified individuals do not exist:", paste0(old.i[is.na(i)], collapse = ", ")), call. = FALSE)
i <- i[!is.na(i)]
if(length(i)==0) {
- warning("no individual selected - ignoring")
+ warning("no individual selected - ignoring", call. = FALSE)
i <- TRUE
}
}
@@ -52,7 +52,11 @@ setMethod("[", signature(x="genind", i="ANY", j="ANY", drop="ANY"), function(x,
if(!is.character(pop)) pop <- popNames(x)[pop]
temp <- !pop %in% pop(x)
if (any(temp)) { # if wrong population specified
- warning(paste("the following specified populations do not exist:", pop[temp]))
+ warning(paste("the following specified populations do not exist:", paste0(pop[temp], collapse = ", ")), call. = FALSE)
+ if (all(temp)){
+ warning("no populations selected - ignoring", call. = FALSE)
+ pop <- pop(x)
+ }
}
i <- pop(x) %in% pop
}
@@ -76,7 +80,11 @@ setMethod("[", signature(x="genind", i="ANY", j="ANY", drop="ANY"), function(x,
if(!is.character(loc)) loc <- locNames(x)[loc]
temp <- !loc %in% locFac(x)
if (any(temp)) { # if wrong loci specified
- warning(paste("the following specified loci do not exist:", loc[temp]))
+ warning(paste("the following specified loci do not exist:", paste0(loc[temp], collapse = ", ")), call. = FALSE)
+ if (all(temp)){
+ warning("no loci selected - ignoring", call. = FALSE)
+ loc <- x at loc.fac
+ }
}
j <- x$loc.fac %in% loc
} # end loc argument
@@ -154,10 +162,10 @@ setMethod("[", "genpop", function(x, i, j, ..., loc=NULL, treatOther=TRUE, drop=
old.i <- i
i <- match(i, popNames(x))
if(any(is.na(i))){
- warning(paste("the following specified populations do not exist:", old.i[is.na(i)]))
+ warning(paste("the following specified populations do not exist:", paste0(old.i[is.na(i)], collapse = ", ")), call. = FALSE)
i <- i[!is.na(i)]
if(length(i)==0) {
- warning("no population selected - ignoring")
+ warning("no population selected - ignoring", call. = FALSE)
i <- TRUE
}
}
@@ -169,7 +177,11 @@ setMethod("[", "genpop", function(x, i, j, ..., loc=NULL, treatOther=TRUE, drop=
if(!is.character(loc)) loc <- locNames(x)[loc]
temp <- !loc %in% locFac(x)
if(any(temp)) { # si mauvais loci
- warning(paste("the following specified loci do not exist:", loc[temp]))
+ warning(paste("the following specified loci do not exist:", paste0(loc[temp], collapse = ", ")), call. = FALSE)
+ }
+ if (all(temp)){
+ warning("no loci selected - ignoring", call. = FALSE)
+ loc <- x at loc.fac
}
j <- x$loc.fac %in% loc
} # end loc argument
@@ -206,7 +218,7 @@ setMethod("[", "genpop", function(x, i, j, ..., loc=NULL, treatOther=TRUE, drop=
} else if(length(obj) == n) { # if the element is not a matrix but has a length == n
obj <- obj[i]
if(is.factor(obj)) {obj <- factor(obj)}
- } else {warning(paste("cannot treat the object",namesOther[counter]))}
+ } else {warning(paste("cannot treat the object",namesOther[counter]), call. = FALSE)}
return(obj)
} # end f1
diff --git a/R/chooseCN.R b/R/chooseCN.R
index b8e6a42..480c9f8 100644
--- a/R/chooseCN.R
+++ b/R/chooseCN.R
@@ -49,10 +49,14 @@
#' plotted (TRUE, default) or not (FALSE).
#' @param edit.nb a logical stating whether the resulting graph should be
#' edited manually for corrections (TRUE) or not (FALSE, default).
+#' @param check.duplicates a logical indicating if duplicate coordinates should be detected; this can be an issue for some graphs; TRUE by default.
+#'
#' @return Returns a connection network having the class \code{nb} or
#' \code{listw}. The xy coordinates are passed as attribute to the created
#' object.
+#'
#' @author Thibaut Jombart \email{t.jombart@@imperial.ac.uk}
+#'
#' @seealso \code{\link{spca}}
#' @keywords spatial utilities
#' @examples
@@ -72,8 +76,10 @@
#' @importFrom spdep "tri2nb" "gabrielneigh" "graph2nb" "relativeneigh" "dnearneigh" "knearneigh" "knn2nb" "nb2listw" "mat2listw" "listw2mat" "lag.listw" "card"
#' @import ade4
#'
-chooseCN <- function(xy,ask=TRUE, type=NULL, result.type="nb", d1=NULL, d2=NULL, k=NULL,
- a=NULL, dmin=NULL, plot.nb=TRUE, edit.nb=FALSE){
+chooseCN <- function(xy, ask = TRUE, type = NULL, result.type = "nb",
+ d1 = NULL, d2 = NULL, k = NULL, a = NULL,
+ dmin = NULL, plot.nb = TRUE, edit.nb = FALSE,
+ check.duplicates = TRUE){
if(is.data.frame(xy)) xy <- as.matrix(xy)
if(ncol(xy) != 2) stop("xy does not have two columns.")
@@ -110,7 +116,7 @@ chooseCN <- function(xy,ask=TRUE, type=NULL, result.type="nb", d1=NULL, d2=NULL,
}
## check for uniqueness of coordinates
- if(any(xyTable(xy)$number>1)){ # if duplicate coords
+ if(check.duplicates && any(xyTable(xy)$number>1)){ # if duplicate coords
DUPLICATE.XY <- TRUE
} else {
DUPLICATE.XY <- FALSE
@@ -141,7 +147,7 @@ chooseCN <- function(xy,ask=TRUE, type=NULL, result.type="nb", d1=NULL, d2=NULL,
cat("\t Inverse distances (type 7)\n")
cat("Answer: ")
- type <- as.integer(readLines(n = 1))
+ type <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
temp <- type < 1 |type > 7
if(temp) cat("\nWrong answer\n")
@@ -196,9 +202,9 @@ chooseCN <- function(xy,ask=TRUE, type=NULL, result.type="nb", d1=NULL, d2=NULL,
dig <- options("digits")
options("digits=5")
cat("\n Enter minimum distance: ")
- d1 <- as.numeric(readLines(n = 1))
+ d1 <- as.numeric(readLines(con = getOption('adegenet.testcon'), n = 1))
cat("\n Enter maximum distance \n(dmin=", d2min, ", dmax=", d2max, "): ")
- d2 <- readLines(n = 1)
+ d2 <- readLines(con = getOption('adegenet.testcon'), n = 1)
## handle character
if(d2=="dmin") {
d2 <- d2min
@@ -221,7 +227,7 @@ chooseCN <- function(xy,ask=TRUE, type=NULL, result.type="nb", d1=NULL, d2=NULL,
if(type==6){
if(is.null(k)) {
cat("\n Enter the number of neighbours: ")
- k <- as.numeric(readLines(n = 1))
+ k <- as.numeric(readLines(con = getOption('adegenet.testcon'), n = 1))
}
cn <- knearneigh(x=xy, k=k)
cn <- knn2nb(cn, sym=TRUE)
@@ -231,16 +237,16 @@ chooseCN <- function(xy,ask=TRUE, type=NULL, result.type="nb", d1=NULL, d2=NULL,
if(type==7){
if(is.null(a)) {
cat("\n Enter the exponent: ")
- a <- as.numeric(readLines(n = 1))
+ a <- as.numeric(readLines(con = getOption('adegenet.testcon'), n = 1))
}
cn <- as.matrix(dist(xy))
if(is.null(dmin)) {
cat("\n Enter the minimum distance \n(range = 0 -", max(cn),"): ")
- dmin <- as.numeric(readLines(n = 1))
+ dmin <- as.numeric(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(a<1) { a <- 1 }
thres <- mean(cn)/1e8
- if(dmin > thres) dmin <- thres
+ if(dmin < thres) dmin <- thres
cn[cn < dmin] <- dmin
cn <- 1/(cn^a)
diag(cn) <- 0
@@ -255,7 +261,7 @@ chooseCN <- function(xy,ask=TRUE, type=NULL, result.type="nb", d1=NULL, d2=NULL,
if(ask & plot.nb) {
plot(cn,xy)
cat("\nKeep this graph (y/n)? ")
- ans <- tolower(readLines(n=1))
+ ans <- tolower(readLines(con = getOption('adegenet.testcon'), n=1))
if(ans=="n") {chooseAgain <- TRUE} else {chooseAgain <- FALSE}
}
else if(plot.nb){
diff --git a/R/compoplot.R b/R/compoplot.R
new file mode 100644
index 0000000..bd10ca7
--- /dev/null
+++ b/R/compoplot.R
@@ -0,0 +1,199 @@
+#' Genotype composition plot
+#'
+#' The compoplot uses a barplot to represent the group assignment probability of
+#' individuals to several groups. It is a generic with methods for the following
+#' objects:
+#'
+#' \itemize{
+#'
+#' \item \code{matrix}: a matrix with individuals in row and genetic clusters in
+#' column, each entry being an assignment probability of the corresponding
+#' individual to the corresponding group
+#'
+#' \item \code{dapc}: the output of the \code{dapc} function; in this case,
+#' group assignments are based upon geometric criteria in the discriminant space
+#'
+#' \item \code{snapclust}: the output of the \code{snapclust} function; in
+#' this case, group assignments are based upon the likelihood of genotypes
+#' belonging to their groups
+#'
+#' }
+#'
+#' @author Thibaut Jombart \email{thibautjombart@@gmail.com}
+#'
+#' @export
+#'
+#' @rdname compoplot
+#' @aliases compoplot
+#'
+#' @param x an object to be used for plotting (see description)
+#'
+#' @param ... further arguments to be passed to \code{barplot}
+#'
+compoplot <- function(x, ...){
+ UseMethod("compoplot", x)
+}
+
+
+#' Palette parser
+#'
+#' @param inPAL a palette function OR a character vector (named or unnamed)
+#' @param npop number of populations/colors desired
+#' @param pnames names of these populations if `inPal` is a function
+#'
+#' @md
+#' @return a named character vector specifying the colors for the palette.
+#' @keywords internal
+#'
+#' @note This was originally from the poppr package [commit a0818eed6](https://github.com/grunwaldlab/poppr/commit/a0818eed6a72d9145e46da73715dc22be0640b0c)
+#'
+#' @examples
+#' palette_parser(rainbow, 5, letters[1:5])
+#' palette_parser(colors()[1:5], 5, letters[1:5])
+.palette_parser <- function(inPAL, npop, pnames){
+ PAL <- try(match.fun(inPAL, descend = FALSE), silent = TRUE)
+ if ("try-error" %in% class(PAL)){
+ if (all(pnames %in% names(inPAL))){
+ color <- inPAL[pnames]
+ } else if (npop == length(inPAL)){
+ color <- stats::setNames(inPAL, pnames)
+ } else if (npop < length(inPAL)){
+ warning("Number of populations fewer than number of colors supplied. Discarding extra colors.")
+ color <- stats::setNames(inPAL[1:npop], pnames)
+ } else {
+ warning("insufficient color palette supplied. Using funky().")
+ color <- stats::setNames(funky(npop), pnames)
+ }
+ } else {
+ color <- stats::setNames(PAL(npop), pnames)
+ }
+ return(color)
+}
+
+
+#' @rdname compoplot
+#'
+#' @aliases compoplot.matrix
+#' @export
+#'
+#' @param col.pal a color palette to be used for the groups; defaults to \code{funky}
+#'
+#' @param border a color for the border of the barplot; use \code{NA} to
+#' indicate no border.
+#'
+#' @param show.lab a logical indicating if individual labels should be displayed
+#'
+#' @param lab a vector of individual labels; if NULL, row.names of the matrix are used
+#'
+#' @param legend a logical indicating whether a legend should be provided for the colors
+#'
+#' @param txt.leg a character vector to be used for the legend
+#'
+#' @param n.col the number of columns to be used for the legend
+#'
+#' @param posi the position of the legend
+#'
+#' @param cleg a size factor for the legend
+#'
+#' @param bg the background to be used for the legend
+#'
+#' @param subset a subset of individuals to retain
+#'
+compoplot.matrix <- function(x, col.pal = funky, border = NA,
+ subset = NULL, show.lab = FALSE,
+ lab = rownames(x), legend = TRUE,
+ txt.leg = colnames(x), n.col = 4,
+ posi = NULL, cleg = .8,
+ bg = transp("white"), ...) {
+
+ ## individual labels
+ if (!show.lab) {
+ lab <- rep("", nrow(x))
+ }
+
+ ## handle subset
+ if (!is.null(subset)) {
+ names(lab) <- rownames(x)
+ x <- x[subset, , drop=FALSE]
+ lab <- lab[rownames(x)]
+ }
+
+ ## group labels
+ if (is.null(txt.leg)) {
+ txt.leg <- colnames(x)
+ }
+ if (is.null(txt.leg)) {
+ txt.leg <- seq_len(ncol(x))
+ }
+ ## generate colors, process arguments
+ col <- .palette_parser(col.pal, ncol(x), txt.leg)
+
+ ## position of the legend
+ if (is.null(posi)) {
+ posi <- list(x=0, y=-.01)
+ }
+
+ ## make the plot: we need to suppress warnings because '...' could contain
+ ## arguments from other methods not meant to be used by 'barplot'
+
+ suppressWarnings(
+ out <- barplot(t(x), col = col,
+ ylab = "membership probability",
+ names.arg = lab, las = 3,
+ border = border, ...) )
+
+ if (legend) {
+ oxpd <- par("xpd")
+ par(xpd=TRUE)
+ legend(posi, fill=col, legend = txt.leg,
+ cex = cleg, ncol = n.col, bg = bg)
+ on.exit(par(xpd=oxpd))
+ }
+
+ return(invisible(out))
+}
+
+
+
+
+
+#' @rdname compoplot
+#' @aliases compoplot.dapc
+#' @export
+#' @param only.grp a subset of groups to retain
+
+## The compoplot for DAPC is basically a compoplot.matrix on the predicted group membership
+## probabilities. Only extra features related to keeping a subset of groups or individuals.
+
+compoplot.dapc <- function(x, only.grp=NULL, border = NA, ...){
+ ## get predictions and subset if needed
+ pred <- predict(x)$posterior
+
+ ## handle group subsetting
+ if (!is.null(only.grp)) {
+ if(is.numeric(only.grp) || is.logical(only.grp)) {
+ only.grp <- levels(x$grp)[only.grp]
+ }
+ to.keep <- as.character(x$grp) %in% only.grp
+ pred <- pred[to.keep, , drop=FALSE]
+ lab <- lab[to.keep]
+ }
+
+ ## call matrix method
+ compoplot(pred, border = border, ...)
+
+} # end compoplot
+
+
+
+
+
+
+
+
+
+#' @rdname compoplot
+#' @export
+compoplot.snapclust <- function(x, border = NA, ...) {
+ compoplot(x$proba, border = border, ...)
+}
diff --git a/R/constructors.R b/R/constructors.R
index 1ee32b3..d8742e4 100644
--- a/R/constructors.R
+++ b/R/constructors.R
@@ -151,8 +151,12 @@ setMethod("initialize", "genind", function(.Object, tab, pop=NULL, prevcall=NULL
## beware, keep levels of pop sorted in
## there order of appearance
if(!is.null(pop)) {
- # convert pop to a factor if it is not
- if(!is.factor(pop)) {pop <- factor(pop)}
+ ## convert pop to a factor if it is not; for consistentcy with the accessor 'pop', we need
+ ## to make sure that levels will be sorted by their order of appearance
+ if(!is.factor(pop)) {
+ pop <- as.character(pop)
+ pop <- factor(pop, levels=unique(pop))
+ }
out at pop <- pop
}
diff --git a/R/dapc.R b/R/dapc.R
index 76fc410..d35b89b 100644
--- a/R/dapc.R
+++ b/R/dapc.R
@@ -42,13 +42,13 @@ dapc.data.frame <- function(x, grp, n.pca=NULL, n.da=NULL,
if(is.null(n.pca) & pca.select=="nbEig"){
plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the number PCs to retain (>=1): ")
- n.pca <- as.integer(readLines(n = 1))
+ n.pca <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(is.null(perc.pca) & pca.select=="percVar"){
plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the percentage of variance to retain (0-100): ")
- nperc.pca <- as.numeric(readLines(n = 1))
+ nperc.pca <- as.numeric(readLines(con = getOption('adegenet.testcon'), n = 1))
}
## get n.pca from the % of variance to conserve
@@ -63,7 +63,6 @@ dapc.data.frame <- function(x, grp, n.pca=NULL, n.da=NULL,
X.rank <- sum(pcaX$eig > 1e-14)
n.pca <- min(X.rank, n.pca)
if(n.pca >= N) n.pca <- N-1
- if(n.pca > N/3) warning("number of retained PCs of PCA may be too large (> N /3)\n results may be unstable ")
n.pca <- round(n.pca)
U <- pcaX$c1[, 1:n.pca, drop=FALSE] # principal axes
@@ -83,7 +82,7 @@ dapc.data.frame <- function(x, grp, n.pca=NULL, n.da=NULL,
if(is.null(n.da)){
barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(grp))) )
cat("Choose the number discriminant functions to retain (>=1): ")
- n.da <- as.integer(readLines(n = 1))
+ n.da <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
##n.da <- min(n.da, length(levels(grp))-1, n.pca) # can't be more than K-1 disc. func., or more than n.pca
@@ -170,10 +169,10 @@ dapc.genind <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
## SOME GENERAL VARIABLES
- N <- nrow(x at tab)
+ N <- nInd(x)
## PERFORM PCA ##
- maxRank <- min(dim(x at tab))
+ maxRank <- min(tab(x))
X <- scaleGen(x, center = TRUE, scale = scale,
NA.method = "mean")
@@ -263,13 +262,13 @@ dapc.genlight <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
if(is.null(n.pca) & pca.select=="nbEig"){
plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the number PCs to retain (>=1): ")
- n.pca <- as.integer(readLines(n = 1))
+ n.pca <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(is.null(perc.pca) & pca.select=="percVar"){
plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the percentage of variance to retain (0-100): ")
- nperc.pca <- as.numeric(readLines(n = 1))
+ nperc.pca <- as.numeric(readLines(con = getOption('adegenet.testcon'), n = 1))
}
## get n.pca from the % of variance to conserve
@@ -297,7 +296,6 @@ dapc.genlight <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
X.rank <- sum(pcaX$eig > 1e-14)
n.pca <- min(X.rank, n.pca)
if(n.pca >= N) n.pca <- N-1
- if(n.pca > N/3) warning("number of retained PCs of PCA may be too large (> N /3)\n results may be unstable ")
U <- pcaX$loadings[, 1:n.pca, drop=FALSE] # principal axes
XU <- pcaX$scores[, 1:n.pca, drop=FALSE] # principal components
@@ -315,7 +313,7 @@ dapc.genlight <- function(x, pop=NULL, n.pca=NULL, n.da=NULL,
if(is.null(n.da)){
barplot(ldaX$svd^2, xlab="Linear Discriminants", ylab="F-statistic", main="Discriminant analysis eigenvalues", col=heat.colors(length(levels(pop.fac))) )
cat("Choose the number discriminant functions to retain (>=1): ")
- n.da <- as.integer(readLines(n = 1))
+ n.da <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
n.da <- min(n.da, length(levels(pop.fac))-1, n.pca, sum(ldaX$svd>1e-10)) # can't be more than K-1 disc. func., or more than n.pca
@@ -496,14 +494,26 @@ summary.dapc <- function(object, ...){
## scatter.dapc
##############
#' @importFrom vegan orditorp
-scatter.dapc <- function(x, xax=1, yax=2, grp=x$grp, col=seasun(length(levels(grp))), pch=20, bg="white", solid=.7,
- scree.da=TRUE, scree.pca=FALSE, posi.da="bottomright", posi.pca="bottomleft", bg.inset="white",
- ratio.da=.25, ratio.pca=.25, inset.da=0.02, inset.pca=0.02, inset.solid=.5,
- onedim.filled=TRUE, mstree=FALSE, lwd=1, lty=1, segcol="black",
- legend=FALSE, posi.leg="topright", cleg=1, txt.leg=levels(grp),
- cstar = 1, cellipse = 1.5, axesell = FALSE, label = levels(grp), clabel = 1, xlim = NULL, ylim = NULL,
- grid = FALSE, addaxes = TRUE, origin = c(0,0), include.origin = TRUE, sub = "", csub = 1, possub = "bottomleft",
- cgrid = 1, pixmap = NULL, contour = NULL, area = NULL, label.inds = NULL, ...){
+#'
+scatter.dapc <- function(x, xax = 1, yax = 2, grp = x$grp,
+ col = seasun(length(levels(grp))),
+ pch = 20, bg = "white", solid = .7,
+ scree.da = TRUE, scree.pca = FALSE,
+ posi.da = "bottomright",
+ posi.pca = "bottomleft",
+ bg.inset = "white",
+ ratio.da = .25, ratio.pca = .25,
+ inset.da = 0.02, inset.pca = 0.02, inset.solid = .5,
+ onedim.filled = TRUE, mstree = FALSE,
+ lwd = 1, lty = 1, segcol = "black",
+ legend = FALSE, posi.leg = "topright",
+ cleg = 1, txt.leg = levels(grp),
+ cstar = 1, cellipse = 1.5, axesell = FALSE,
+ label = levels(grp), clabel = 1, xlim = NULL, ylim = NULL,
+ grid = FALSE, addaxes = TRUE, origin = c(0,0),
+ include.origin = TRUE, sub = "", csub = 1, possub = "bottomleft",
+ cgrid = 1, pixmap = NULL, contour = NULL,
+ area = NULL, label.inds = NULL, ...){
ONEDIM <- xax==yax | ncol(x$ind.coord)==1
@@ -534,9 +544,14 @@ scatter.dapc <- function(x, xax=1, yax=2, grp=x$grp, col=seasun(length(levels(gr
axes <- c(xax,yax)
## basic empty plot
- s.class(x$ind.coord[,axes], fac=grp, col=col, cpoint=0, cstar = cstar, cellipse = cellipse, axesell = axesell, label = label,
- clabel = clabel, xlim = xlim, ylim = ylim, grid = grid, addaxes = addaxes, origin = origin, include.origin = include.origin,
- sub = sub, csub = csub, possub = possub, cgrid = cgrid, pixmap = pixmap, contour = contour, area = area)
+ s.class(x$ind.coord[,axes], fac = grp, col = col, cpoint = 0,
+ cstar = cstar, cellipse = cellipse, axesell = axesell,
+ label = label, clabel = clabel, xlim = xlim, ylim = ylim,
+ grid = grid, addaxes = addaxes, origin = origin,
+ include.origin = include.origin,
+ sub = sub, csub = csub, possub = possub,
+ cgrid = cgrid, pixmap = pixmap,
+ contour = contour, area = area)
## add points
colfac <- pchfac <- grp
@@ -547,31 +562,42 @@ scatter.dapc <- function(x, xax=1, yax=2, grp=x$grp, col=seasun(length(levels(gr
if(is.numeric(col)) colfac <- as.numeric(colfac)
if(is.numeric(pch)) pchfac <- as.numeric(pchfac)
- points(x$ind.coord[,xax], x$ind.coord[,yax], col=colfac, pch=pchfac, ...)
- s.class(x$ind.coord[,axes], fac=grp, col=col, cpoint=0, add.plot=TRUE, cstar = cstar, cellipse = cellipse, axesell = axesell, label = label,
- clabel = clabel, xlim = xlim, ylim = ylim, grid = grid, addaxes = addaxes, origin = origin, include.origin = include.origin,
- sub = sub, csub = csub, possub = possub, cgrid = cgrid, pixmap = pixmap, contour = contour, area = area)
+ points(x$ind.coord[,xax],
+ x$ind.coord[,yax],
+ col = colfac, pch = pchfac, ...)
+ s.class(x$ind.coord[,axes], fac = grp, col = col, cpoint = 0,
+ add.plot=TRUE, cstar = cstar, cellipse = cellipse,
+ axesell = axesell, label = label,clabel = clabel,
+ xlim = xlim, ylim = ylim, grid = grid,
+ addaxes = addaxes, origin = origin,
+ include.origin = include.origin,
+ sub = sub, csub = csub, possub = possub,
+ cgrid = cgrid, pixmap = pixmap,
+ contour = contour, area = area)
+
+ ## Add labels of individuals if specified. Play around with "air" to get
+ ## a satisfactory result.
- # Add labels of individuals if specified. Play around with "air" to get
- # a satisfactory result.
if (!is.null(label.inds) & is.list(label.inds)) {
- appendList <- function (x, val) {
- # recursevly "bind" a list into a longer list,
- # from http://stackoverflow.com/a/9519964/322912
- stopifnot(is.list(x), is.list(val))
- xnames <- names(x)
- for (v in names(val)) {
- x[[v]] <- if (v %in% xnames && is.list(x[[v]]) && is.list(val[[v]]))
- appendList(x[[v]], val[[v]])
- else c(x[[v]], val[[v]])
+ appendList <- function (x, val) {
+ # recursevly "bind" a list into a longer list,
+ # from http://stackoverflow.com/a/9519964/322912
+ stopifnot(is.list(x), is.list(val))
+ xnames <- names(x)
+ for (v in names(val)) {
+ x[[v]] <- if (v %in% xnames && is.list(x[[v]]) && is.list(val[[v]]))
+ appendList(x[[v]], val[[v]])
+ else c(x[[v]], val[[v]])
+ }
+ x
}
- x
- }
-
- do.call("orditorp", c(appendList(list(x = x$ind.coord[, c(xax, yax)], display = "species"),
- label.inds)))
+
+ do.call("orditorp",
+ c(appendList(list(x = x$ind.coord[, c(xax, yax)],
+ display = "species"),
+ label.inds)))
}
-
+
## add minimum spanning tree if needed
if(mstree){
meanposi <- apply(x$tab,2, tapply, grp, mean)
@@ -581,7 +607,7 @@ scatter.dapc <- function(x, xax=1, yax=2, grp=x$grp, col=seasun(length(levels(gr
y0 <- x$grp.coord[tre[,1], axes[2]]
x1 <- x$grp.coord[tre[,2], axes[1]]
y1 <- x$grp.coord[tre[,2], axes[2]]
- segments(x0, y0, x1, y1, lwd=lwd, lty=lty, col=segcol)
+ segments(x0, y0, x1, y1, lwd = lwd, lty = lty, col = segcol)
}
} else {
@@ -599,14 +625,20 @@ scatter.dapc <- function(x, xax=1, yax=2, grp=x$grp, col=seasun(length(levels(gr
allx <- unlist(lapply(ldens, function(e) e$x))
ally <- unlist(lapply(ldens, function(e) e$y))
par(bg=bg)
- plot(allx, ally, type="n", xlab=paste("Discriminant function", pcLab), ylab="Density")
+ plot(allx, ally, type = "n",
+ xlab = paste("Discriminant function", pcLab),
+ ylab = "Density")
for(i in 1:length(ldens)){
if(!onedim.filled) {
- lines(ldens[[i]]$x,ldens[[i]]$y, col=col[i], lwd=2) # add lines
+ lines(ldens[[i]]$x, ldens[[i]]$y, col = col[i], lwd = 2) # add lines
} else {
- polygon(c(ldens[[i]]$x,rev(ldens[[i]]$x)),c(ldens[[i]]$y,rep(0,length(ldens[[i]]$x))), col=col[i], lwd=2, border=col[i]) # add lines
+ polygon(c(ldens[[i]]$x, rev(ldens[[i]]$x)),
+ c(ldens[[i]]$y, rep(0,length(ldens[[i]]$x))),
+ col = col[i], lwd = 2, border = col[i]) # add lines
}
- points(x=x$ind.coord[grp==levels(grp)[i],pcLab], y=rep(0, sum(grp==levels(grp)[i])), pch="|", col=col[i]) # add points for indiv
+ points(x = x$ind.coord[grp==levels(grp)[i], pcLab],
+ y = rep(0, sum(grp==levels(grp)[i])),
+ pch = "|", col = col[i]) # add points for indiv
}
}
@@ -616,10 +648,12 @@ scatter.dapc <- function(x, xax=1, yax=2, grp=x$grp, col=seasun(length(levels(gr
## add a legend
temp <- list(...)$cex
if(is.null(temp)) temp <- 1
- if(ONEDIM | temp<0.5 | all(pch=="")) {
- legend(posi.leg, fill=col, legend=txt.leg, cex=cleg, bg=bg.inset)
+ if(ONEDIM | temp<0.5 | all(pch == "")) {
+ legend(posi.leg, fill = col, legend = txt.leg,
+ cex = cleg, bg = bg.inset)
} else {
- legend(posi.leg, col=col, legend=txt.leg, cex=cleg, bg=bg.inset, pch=pch, pt.cex=temp)
+ legend(posi.leg, col = col, legend = txt.leg, cex = cleg,
+ bg = bg.inset, pch = pch, pt.cex = temp)
}
}
@@ -635,8 +669,8 @@ scatter.dapc <- function(x, xax=1, yax=2, grp=x$grp, col=seasun(length(levels(gr
box()
}
- add.scatter(inset(), posi=posi.da, ratio=ratio.da, bg.col=bg.inset, inset=inset.da)
- ##add.scatter.eig(x$eig, ncol(x$loadings), axes[1], axes[2], posi=posi, ratio=ratio, csub=csub) # does not allow for bg
+ add.scatter(inset(), posi = posi.da, ratio = ratio.da,
+ bg.col = bg.inset, inset = inset.da)
}
## eigenvalues PCA
@@ -649,7 +683,8 @@ scatter.dapc <- function(x, xax=1, yax=2, grp=x$grp, col=seasun(length(levels(gr
type="h", xaxt="n", yaxt="n", xlab="", ylab="", lwd=2)
mtext(side=3, "PCA eigenvalues", line=-1.2, adj=.1)
}
- add.scatter(inset(), posi=posi.pca, ratio=ratio.pca, bg.col=bg.inset, inset=inset.pca)
+ add.scatter(inset(), posi = posi.pca, ratio = ratio.pca,
+ bg.col = bg.inset, inset = inset.pca)
}
@@ -720,83 +755,6 @@ assignplot <- function(x, only.grp=NULL, subset=NULL, new.pred=NULL, cex.lab=.75
-############
-## compoplot
-############
-compoplot <- function(x, only.grp=NULL, subset=NULL, new.pred=NULL, col=NULL, lab=NULL,
- legend=TRUE, txt.leg=NULL, ncol=4, posi=NULL, cleg=.8, bg=transp("white"), ...){
- if(!inherits(x, "dapc")) stop("x is not a dapc object")
-
-
- ## HANDLE ARGUMENTS ##
- ngrp <- length(levels(x$grp))
-
- ## col
- if(is.null(col)){
- col <- rainbow(ngrp)
- }
-
- ## lab
- if(is.null(lab)){
- lab <- rownames(x$tab)
- } else {
- ## recycle labels
- lab <- rep(lab, le=nrow(x$tab))
- }
-
- ## posi
- if(is.null(posi)){
- posi <- list(x=0, y=-.01)
- }
-
- ## txt.leg
- if(is.null(txt.leg)){
- txt.leg <- levels(x$grp)
- }
-
- ## HANDLE DATA FROM PREDICT.DAPC ##
- if(!is.null(new.pred)){
- n.new <- length(new.pred$assign)
- x$grp <- c(as.character(x$grp), rep("unknown", n.new))
- x$assign <- c(as.character(x$assign), as.character(new.pred$assign))
- x$posterior <- rbind(x$posterior, new.pred$posterior)
- lab <- c(lab, rownames(new.pred$posterior))
- }
-
-
- ## TREAT OTHER ARGUMENTS ##
- if(!is.null(only.grp)){
- only.grp <- as.character(only.grp)
- ori.grp <- as.character(x$grp)
- x$grp <- x$grp[only.grp==ori.grp]
- x$assign <- x$assign[only.grp==ori.grp]
- x$posterior <- x$posterior[only.grp==ori.grp, , drop=FALSE]
- lab <- lab[only.grp==ori.grp]
- } else if(!is.null(subset)){
- x$grp <- x$grp[subset]
- x$assign <- x$assign[subset]
- x$posterior <- x$posterior[subset, , drop=FALSE]
- lab <- lab[subset]
- }
-
-
- ## MAKE THE PLOT ##
- Z <- t(x$posterior)
- barplot(Z, border=NA, col=col, ylab="membership probability", names=lab, las=3, ...)
-
- if(legend){
- oxpd <- par("xpd")
- par(xpd=TRUE)
- legend(posi, fill=col, leg=txt.leg, cex=cleg, ncol=ncol, bg=bg)
- on.exit(par(xpd=oxpd))
- }
-
- return(invisible(match.call()))
-} # end compoplot
-
-
-
-
###############
## a.score
@@ -975,7 +933,17 @@ predict.dapc <- function(object, newdata, prior = object$prior, dimen,
if(!missing(newdata)){
## make a few checks
if(is.null(object$pca.loadings)) stop("DAPC object does not contain loadings of original variables. \nPlease re-run DAPC using 'pca.loadings=TRUE'.")
- newdata <- as.matrix(newdata) # to force conversion, notably from genlight objects
+ ## We need to convert the data as they were converted during the analysis. Behaviour is:
+ ## - genind: allele frequencies, missing data = mean
+ ## - genlight: allele frequencies, missing data = mean
+
+ if (is.genind(newdata)) { # genind object
+ newdata <- tab(newdata, freq = TRUE, NA.method = "mean")
+ } else if (inherits(newdata, "genlight")) { # genlight object
+ newdata <- as.matrix(newdata) / ploidy(newdata)
+ } else { # any other type of object
+ newdata <- as.matrix(newdata)
+ }
if(ncol(newdata) != nrow(object$pca.loadings)) stop("Number of variables in newdata does not match original data.")
## centre/scale data
@@ -1021,77 +989,6 @@ predict.dapc <- function(object, newdata, prior = object$prior, dimen,
-############
-## xvalDapc
-############
-
-xvalDapc <- function (x, ...) UseMethod("xvalDapc")
-
-xvalDapc.data.frame <- function(x, grp, n.pca.max, n.da=NULL, training.set = 0.9,
- result=c("groupMean","overall"),
- center=TRUE, scale=FALSE, n.pca=NULL, n.rep=10, ...){
-
- ## CHECKS ##
- grp <- factor(grp)
- n.pca <- n.pca[n.pca>0]
- result <- match.arg(result)
- if(is.null(n.da)) {
- n.da <- length(levels(grp))-1
- }
-
- ## GET TRAINING SET SIZE ##
- N <- nrow(x)
- N.training <- round(N*training.set)
-
- ## GET FULL PCA ##
- if(missing(n.pca.max)) n.pca.max <- min(dim(x))
- pcaX <- dudi.pca(x, nf=n.pca.max, scannf=FALSE, center=center, scale=scale)
- n.pca.max <- min(n.pca.max,pcaX$rank,N.training-1)
-
- ## DETERMINE N.PCA IF NEEDED ##
- if(is.null(n.pca)){
- n.pca <- round(pretty(1:n.pca.max,10))
- }
- n.pca <- n.pca[n.pca>0 & n.pca<(N.training-1)]
-
- ## FUNCTION GETTING THE % OF ACCURATE PREDICTION FOR ONE NUMBER OF PCA PCs ##
- ## n.pca is a number of retained PCA PCs
- VOID.GRP <- FALSE # will be TRUE if empty group happened
- get.prop.pred <- function(n.pca){
- f1 <- function(){
- toKeep <- sample(1:N, N.training)
- if(!(all(table(grp[toKeep])>0) & all(table(grp[-toKeep])>0))) VOID.GRP <<- TRUE
- temp.pca <- pcaX
- temp.pca$li <- temp.pca$li[toKeep,,drop=FALSE]
- temp.dapc <- suppressWarnings(dapc(x[toKeep,,drop=FALSE], grp[toKeep], n.pca=n.pca, n.da=n.da, dudi=temp.pca))
- temp.pred <- predict.dapc(temp.dapc, newdata=x[-toKeep,,drop=FALSE])
- if(result=="overall"){
- out <- mean(temp.pred$assign==grp[-toKeep])
- }
- if(result=="groupMean"){
- out <- mean(tapply(temp.pred$assign==grp[-toKeep], grp[-toKeep], mean), na.rm=TRUE)
- }
- return(out)
- }
- return(replicate(n.rep, f1()))
- }
-
-
- ## GET %SUCCESSFUL OF ACCURATE PREDICTION FOR ALL VALUES ##
- res.all <- unlist(lapply(n.pca, get.prop.pred))
- if(VOID.GRP) warning("At least one group was absent from the training / validating sets.\nTry using smaller training sets.")
- res <- data.frame(n.pca=rep(n.pca, each=n.rep), success=res.all)
- return(res)
-} # end xvalDapc.data.frame
-
-
-xvalDapc.matrix <- xvalDapc.data.frame
-
-
-
-
-
-
## #############
## ## discriVal
## #############
diff --git a/R/datasets.R b/R/datasets.R
index 7d798a6..a24cc84 100644
--- a/R/datasets.R
+++ b/R/datasets.R
@@ -743,3 +743,39 @@ NULL
+#' Toy hybrid dataset
+#' @name hybridtoy
+#' @aliases hybridtoy
+#' @docType data
+#' @format a \linkS4class{genind} object
+#' @author Data simulated by Marie-Pauline Beugin. Example by Thibaut Jombart.
+#'
+#' @examples
+#' data(hybridtoy)
+#' x <- hybridtoy
+#' pca1 <- dudi.pca(tab(x), scannf=FALSE, scale=FALSE)
+#' s.class(pca1$li, pop(x))
+#'
+#' if(require(ggplot2)) {
+#' p <- ggplot(pca1$li, aes(x=Axis1)) +
+#' geom_density(aes(fill=pop(x)), alpha=.4, adjust=1) +
+#' geom_point(aes(y=0, color=pop(x)), pch="|", size=10, alpha=.5)
+#' p
+#' }
+#'
+#' ## kmeans
+#' km <- find.clusters(x, n.pca=10, n.clust=2)
+#' table(pop(x), km$grp)
+#'
+#' ## dapc
+#' dapc1 <- dapc(x, pop=km$grp, n.pca=10, n.da=1)
+#' scatter(dapc1)
+#' scatter(dapc1, grp=pop(x))
+#' compoplot(dapc1, col.pal=spectral, n.col=2)
+#'
+#' ## ML-EM with hybrids
+#' res <- snapclust(x, k=2, hybrids=TRUE, detailed=TRUE)
+#' compoplot(res, n.col=3)
+#' table(res$group, pop(x))
+#'
+NULL
diff --git a/R/dist.genlight.R b/R/dist.genlight.R
new file mode 100644
index 0000000..174fe22
--- /dev/null
+++ b/R/dist.genlight.R
@@ -0,0 +1,33 @@
+#' @title Distance matrices from genlight objects
+#'
+#' @name dist.genlight
+#' @rdname dist.genlight
+#'
+#' @description
+#' Create distance matrices from genlight objects
+#'
+#' @details
+#'
+#' The creation of distance matrices, matrices of numbers that describe how different each of the samples are, is a fundamental task in the statistical analysis of individuals or populations (i.e., groups of individuals).
+#' However, there isn't actually a function that creates distance matrices from genlight objects in adegenet.
+#' Instead, the authors of adegenet created an `as.matrix()` function that converts a genlight object to a matrix.
+#' This is clever because the function `dist()` in the package `stats` tries to convert whatever object it is given to a matrix.
+#' The result is that when you call `dist()` on a genlight object it uses the `dist()` function to create a distance matrix.
+#' The reason this is clever is because it uses pre-existing code.
+#' The downside is that because there is no function to specifically create distance matrices from genlight objects in adegenet, there is no documentation in genlight for how this is done.
+#' And because the author of `dist()` never anticipated it could be used on genlight objects, there is no documentation for it there either.
+#' And we can find documentation for this function with `?dist`.
+#' To summarize, we can create a distance matrix from a genlight object using `dist()`.
+#'
+#'
+#' There are also functions to create distance matrices from genlight objects that exist in other packages.
+#' The function `bitwise.dist()` in the package [poppr](https://CRAN.R-project.org/package=poppr) is an example.
+#' We can find documentation for this function with `?poppr::bitwise.dist`.
+#' Again, a downside of this is that you need to know where to look for this information or you may not find it.
+#'
+#'
+#' Lastly, because you can use `as.matrix()` on your genlight object, and most distance algorithms can use this matrix as input, you can use this as an intermediate step to create a matrix from your genlight object and pass it to your distance algorithm of choice.
+#' Options include [ade4](https://CRAN.R-project.org/package=ade4), `vegdist()` in [vegan](https://CRAN.R-project.org/package=vegan), or `daisy()` in [cluster](https://CRAN.R-project.org/package=cluster).
+#' Note that it is up to you to determine which distance metric is best for your analysis.
+#' A number of options therefore exist for creating distance matrices from genlight objects.
+
diff --git a/R/dist.genpop.R b/R/dist.genpop.R
index d58f596..f3af5b5 100644
--- a/R/dist.genpop.R
+++ b/R/dist.genpop.R
@@ -36,7 +36,7 @@ dist.genpop <- function(x, method = 1, diag = FALSE, upper = FALSE) {
cat("4 = Rodgers 1972\n")
cat("5 = Provesti 1975\n")
cat("Select an integer (1-5): ")
- method <- as.integer(readLines(n = 1))
+ method <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if (all((1:5)!=method)) (stop ("Non convenient method number"))
diff --git a/R/doc_C_routines.R b/R/doc_C_routines.R
new file mode 100644
index 0000000..5da0eae
--- /dev/null
+++ b/R/doc_C_routines.R
@@ -0,0 +1,15 @@
+#' Internal C routines
+#'
+#' These functions are internal C routines used in adegenet. Do not use them
+#' unless you know what you are doing.
+#'
+#' @aliases CheckAllSeg GLdotProd GLsumFreq GLsumInt binIntToBytes bytesToBinInt
+#' bytesToInt nb_shared_all
+#'
+#' @author Thibaut Jombart
+#'
+#' @export
+#'
+#' @rdname doc_C_routines
+#'
+".internal_C_routines" <- NULL
diff --git a/R/export_to_mvmapper.R b/R/export_to_mvmapper.R
new file mode 100644
index 0000000..4fd0471
--- /dev/null
+++ b/R/export_to_mvmapper.R
@@ -0,0 +1,271 @@
+#' Export analysis for mvmapper visualisation
+#'
+#' \code{mvmapper} is an interactive tool for visualising outputs of a
+#' multivariate analysis on a map from a web browser. The function
+#' \code{export_to_mvmapper} is a generic with methods for several standard
+#' classes of analyses in \code{adegenet} and \code{ade4}. Information on
+#' individual locations, as well as any other relevant data, is passed through
+#' the second argument \code{info}. By default, the function returns a formatted
+#' \code{data.frame} and writes the output to a .csv file.\cr
+#'
+#' \code{mvmapper} can be found at:
+#' \url{https://popphylotools.github.io/mvMapper/}
+#'
+#' @author Thibaut Jombart \email{thibautjombart@@gmail.com}
+#'
+#'
+#' @param x The analysis to be exported. Can be a \code{dapc}, \code{spca}, or a
+#' \code{dudi} object.
+#'
+#' @param info A \code{data.frame} with additional information containing at
+#' least the following columns: \code{key} (unique individual identifier),
+#' \code{lat} (latitude), and \code{lon} (longitude). Other columns will be
+#' exported as well, but are optional.
+#'
+#'
+#' @param write_file A \code{logical} indicating if the output should be written
+#' out to a .csv file. Defaults to \code{TRUE}.
+#'
+#' @param out_file A character string indicating the file to which the output
+#' should be written. If NULL, the file used will be named
+#' \code{'mvmapper_data_[date and time].csv'}
+#'
+#' @param ... Further arguments to pass to other methods.
+#'
+#' @return A \code{data.frame} which can serve as input to \code{mvmapper},
+#' containing at least the following columns:
+#'
+#' \itemize{
+#'
+#' \item \code{key}: unique individual identifiers
+#'
+#' \item \code{PC1}: first principal component; further principal components are
+#' optional, but if provided will be numbered and follow \code{PC1}.
+#'
+#' \item \code{lat}: latitude for each individual
+#'
+#' \item \code{lon}: longitude for each individual
+#'
+#' }
+#'
+#' In addition, specific information is added for some analyses:
+#'
+#' \itemize{
+#'
+#' \item \code{spca}: \code{Lag_PC} columns contain the lag-vectors of the
+#' principal components; the lag operator computes, for each individual, the
+#' average score of neighbouring individuals; it is useful for clarifying
+#' patches and clines.
+#'
+#' \item \code{dapc}: \code{grp} is the group used in the analysis;
+#' \code{assigned_grp} is the group assignment based on the discriminant
+#' functions; \code{support} is the statistical support (i.e. assignment
+#' probability) for \code{assigned_grp}.
+#'
+#' }
+#'
+#'
+#'
+#' @export
+#'
+#' @rdname export_to_mvmapper
+#'
+#' @seealso
+#'
+#' \code{mvmapper} is available at:
+#' \url{https://popphylotools.github.io/mvMapper/}
+#'
+
+
+export_to_mvmapper <- function(x, ...) {
+ UseMethod("export_to_mvmapper")
+}
+
+
+
+
+
+#' @export
+#' @rdname export_to_mvmapper
+
+export_to_mvmapper.default <- function(x, ...) {
+ msg <- sprintf("No method available for the class %s",
+ paste(class(x), collapse = ", "))
+ stop(msg)
+}
+
+
+
+
+
+
+## All method will consist in merging output from the analysis with extra info
+## containing latitude and longitude, stored in 'info'.
+
+#' @export
+#' @rdname export_to_mvmapper
+#' @examples
+#'
+#' data(sim2pop)
+#'
+#' dapc1 <- dapc(sim2pop, n.pca = 10, n.da = 1)
+#'
+#' info <- data.frame(key = indNames(sim2pop),
+#' lat = other(sim2pop)$xy[,2],
+#' lon = other(sim2pop)$xy[,1],
+#' Population = pop(sim2pop))
+#'
+#' out <- export_to_mvmapper(dapc1, info, write_file = FALSE)
+#' head(out)
+
+export_to_mvmapper.dapc <- function(x, info, write_file = TRUE, out_file = NULL, ...) {
+
+ ## Extract principal components, groups, assigned groups and the corresponding
+ ## probability.
+
+ pcs <- x$ind.coord
+ colnames(pcs) <- paste0("PC", 1:ncol(pcs))
+ key <- rownames(pcs)
+ grp <- x$grp
+ assigned_grp <- x$assign
+ support <- apply(x$posterior, 1, max)
+
+
+ analysis <- cbind.data.frame(key, pcs,
+ grp,
+ assigned_grp,
+ support)
+
+ ## process 'info' (checks that required columns are there)
+ info <- .check_info(info, key)
+
+ out <- merge(analysis, info, by = "key")
+ .write_mvmapper_output(out, write_file, out_file)
+ return(out)
+}
+
+
+
+
+
+
+#' @export
+#' @rdname export_to_mvmapper
+
+export_to_mvmapper.dudi <- function(x, info, write_file = TRUE, out_file = NULL, ...) {
+
+ ## Extract principal components, groups, assigned groups and the corresponding
+ ## probability.
+
+ pcs <- x$li
+ colnames(pcs) <- paste0("PC", 1:ncol(pcs))
+ key <- rownames(pcs)
+
+ analysis <- cbind.data.frame(key, pcs)
+
+ ## process 'info' (checks that required columns are there)
+ info <- .check_info(info, key)
+
+ out <- merge(analysis, info, by = "key")
+ .write_mvmapper_output(out, write_file, out_file)
+ return(out)
+}
+
+
+
+
+
+
+#' @export
+#' @rdname export_to_mvmapper
+#' @examples
+#'
+#' data(rupica)
+#'
+#' spca1 <- spca(rupica, type=5, d1 = 0, d2 = 2300,
+#' plot = FALSE, scannf = FALSE,
+#' nfposi = 2,nfnega = 0)
+#'
+#' info <- data.frame(key = indNames(rupica),
+#' lat = rupica$other$xy[,2],
+#' lon = rupica$other$xy[,1])
+#'
+#' out <- export_to_mvmapper(spca1, info, write_file = FALSE)
+#' head(out)
+#'
+
+export_to_mvmapper.spca <- function(x, info, write_file = TRUE, out_file = NULL, ...) {
+
+ ## Extract principal components, groups, assigned groups and the corresponding
+ ## probability.
+
+ pcs <- x$li
+ colnames(pcs) <- paste0("PC", 1:ncol(pcs))
+ lag_pcs <- x$ls
+ colnames(lag_pcs) <- paste0("Lag_PC", 1:ncol(pcs))
+ key <- rownames(pcs)
+
+ analysis <- cbind.data.frame(key, pcs, lag_pcs)
+
+ ## process 'info' (checks that required columns are there)
+ info <- .check_info(info, key)
+
+ out <- merge(analysis, info, by = "key")
+ .write_mvmapper_output(out, write_file, out_file)
+ return(out)
+}
+
+
+
+
+
+
+## This internal function will merely check the content of the extra 'info'
+## being provided, making sure key, latitude and longitude are provided.
+
+.check_info <- function(info, ref_keys,
+ look_for = c("key", "lat", "lon")) {
+
+ info <- as.data.frame(info)
+
+ if (length(look_for) > 0L) {
+ for (e in look_for) {
+ if (!e %in% names(info)) {
+ msg <- sprintf("'info' is missing a '%s' column", e)
+ stop(msg)
+ }
+ }
+ }
+
+ nb_missing <- sum(!ref_keys %in% info$key)
+ if (nb_missing > 0L) {
+ msg <- sprintf("%d individuals are not documented in 'info'",
+ nb_missing)
+ warning(msg)
+ }
+
+ return(info)
+}
+
+
+
+
+
+
+## This internal function writes results to a csv file if needed, and does
+## nothing otherwise.
+##
+## 'x' is the data.frame output from the export function
+## other arguments as documented
+##
+.write_mvmapper_output <- function(x, write_file = TRUE, out_file = NULL) {
+ if (write_file) {
+ if (is.null(out_file)) {
+ out_file <- paste0("mvmapper_data_",
+ gsub(" ", "_", Sys.time()),
+ ".csv")
+ }
+ message("Writing output to the file: ", out_file)
+ write.csv(x, out_file, row.names = FALSE)
+ }
+}
diff --git a/R/find.clust.R b/R/find.clust.R
index b5c636d..8a8b0d7 100644
--- a/R/find.clust.R
+++ b/R/find.clust.R
@@ -6,10 +6,15 @@ find.clusters <- function (x, ...) UseMethod("find.clusters")
############################
## find.clusters.data.frame
############################
-find.clusters.data.frame <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, stat=c("BIC", "AIC", "WSS"), choose.n.clust=TRUE,
- criterion=c("diffNgroup", "min","goesup", "smoothNgoesup", "goodfit"),
- max.n.clust=round(nrow(x)/10), n.iter=1e5, n.start=10, center=TRUE, scale=TRUE,
- pca.select=c("nbEig","percVar"), perc.pca=NULL,..., dudi=NULL){
+find.clusters.data.frame <- function(x, clust = NULL, n.pca = NULL, n.clust = NULL,
+ method = c("kmeans", "ward"),
+ stat = c("BIC", "AIC", "WSS"), choose.n.clust = TRUE,
+ criterion = c("diffNgroup", "min", "goesup",
+ "smoothNgoesup", "goodfit"),
+ max.n.clust = round(nrow(x)/10), n.iter = 1e5,
+ n.start = 10, center = TRUE, scale = TRUE,
+ pca.select = c("nbEig","percVar"),
+ perc.pca = NULL, ..., dudi = NULL){
## CHECKS ##
stat <- match.arg(stat)
@@ -17,7 +22,8 @@ find.clusters.data.frame <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, st
criterion <- match.arg(criterion)
min.n.clust <- 2
max.n.clust <- max(max.n.clust, 2)
-
+ method <- match.arg(method)
+
## KEEP TRACK OF SOME ORIGINAL PARAMETERS
## n.pca.ori <- n.pca
##n.clust.ori <- n.clust
@@ -25,8 +31,14 @@ find.clusters.data.frame <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, st
## ESCAPE IF SUB-CLUST ARE SEEKED ##
if(!is.null(clust)){
- res <- .find.sub.clusters(x=x, clust=clust, n.pca=n.pca, n.clust=n.clust, stat=stat, max.n.clust=max.n.clust, n.iter=n.iter, n.start=n.start,
- choose.n.clust=choose.n.clust, criterion=criterion, center=center, scale=scale)
+ res <- .find.sub.clusters(x = x, clust = clust, n.pca = n.pca,
+ n.clust = n.clust, stat = stat,
+ max.n.clust = max.n.clust,
+ n.iter = n.iter, n.start = n.start,
+ choose.n.clust = choose.n.clust,
+ criterion = criterion,
+ method = method,
+ center = center, scale = scale)
return(res)
}
## END SUB-CLUST
@@ -52,16 +64,22 @@ find.clusters.data.frame <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, st
}
## select the number of retained PC for PCA
- if(is.null(n.pca) & pca.select=="nbEig"){
- plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
- cat("Choose the number PCs to retain (>=1): ")
- n.pca <- as.integer(readLines(n = 1))
+ if(is.null(n.pca) & pca.select == "nbEig"){
+ plot(cumVar, xlab = "Number of retained PCs",
+ ylab = "Cumulative variance (%)",
+ main = "Variance explained by PCA",
+ col = myCol)
+ cat("Choose the number PCs to retain (>= 1): ")
+ n.pca <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
- if(is.null(perc.pca) & pca.select=="percVar"){
- plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
+ if(is.null(perc.pca) & pca.select == "percVar"){
+ plot(cumVar, xlab = "Number of retained PCs",
+ ylab = "Cumulative variance (%)",
+ main = "Variance explained by PCA",
+ col = myCol)
cat("Choose the percentage of variance to retain (0-100): ")
- nperc.pca <- as.numeric(readLines(n = 1))
+ nperc.pca <- as.numeric(readLines(con = getOption('adegenet.testcon'), n = 1))
}
## get n.pca from the % of variance to conserve
@@ -86,9 +104,17 @@ find.clusters.data.frame <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, st
WSS <- numeric(0)
for(i in 1:length(nbClust)){
- ## temp <- kmeans(XU, centers=nbClust[i], iter.max=min(n.iter, 100), nstart=min(n.start, 1e3))
- temp <- kmeans(XU, centers=nbClust[i], iter.max=n.iter, nstart=n.start)
- WSS[i] <- sum(temp$withinss)
+ if (method == "kmeans") {
+ ## kmeans clustering (original method)
+ temp <- kmeans(XU, centers = nbClust[i], iter.max = n.iter, nstart = n.start)
+ ##WSS[i] <- sum(temp$withinss)
+ } else {
+ ## ward clustering
+ temp <- list()
+ temp$cluster <- cutree(hclust(dist(XU)^2, method = "ward.D2"), k = nbClust[i])
+ }
+ WSS[i] <- .compute.wss(XU, temp$cluster)
+
}
@@ -124,12 +150,13 @@ find.clusters.data.frame <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, st
}
if(choose.n.clust){
- plot(c(1,nbClust), myStat, xlab="Number of clusters", ylab=myLab, main=myTitle, type="o", col="blue")
+ plot(c(1,nbClust), myStat, xlab = "Number of clusters",
+ ylab = myLab, main = myTitle, type = "o", col = "blue")
abline(h=0, lty=2, col="red")
cat("Choose the number of clusters (>=2: ")
n.clust <- NA
while(is.na(n.clust)){
- n.clust <- max(1, as.integer(readLines(n = 1)))
+ n.clust <- max(1, as.integer(readLines(con = getOption('adegenet.testcon'), n = 1)))
}
} else {
if(criterion=="min") {
@@ -145,13 +172,14 @@ find.clusters.data.frame <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, st
n.clust <- min( which(myStat < temp))-1
}
if(criterion=="diffNgroup") {
- temp <- cutree(hclust(dist(diff(myStat)), method="ward"), k=2)
+ temp <- cutree(hclust(dist(diff(myStat)), method="ward.D"), k=2)
goodgrp <- which.min(tapply(diff(myStat), temp, mean))
n.clust <- max(which(temp==goodgrp))+1
}
if(criterion=="smoothNgoesup") {
temp <- myStat
- temp[2:(length(myStat)-1)] <- sapply(1:(length(myStat)-2), function(i) mean(myStat[c(i,i+1,i+2)]))
+ temp[2:(length(myStat)-1)] <- sapply(1:(length(myStat)-2),
+ function(i) mean(myStat[c(i,i+1,i+2)]))
n.clust <- min(which(diff(temp)>0))
}
@@ -162,7 +190,16 @@ find.clusters.data.frame <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, st
## get final groups
if(n.clust >1){
- best <- kmeans(XU, centers=n.clust, iter.max=n.iter, nstart=n.start)
+ if (method == "kmeans") {
+ best <- kmeans(XU, centers = n.clust, iter.max = n.iter,
+ nstart = n.start)
+ } else {
+ best <- list()
+ best$cluster <- cutree(hclust(dist(XU)^2, method = "ward.D2"),
+ k = n.clust)
+ best$size <- table(best$cluster)
+ }
+
} else {
best <- list(cluster=factor(rep(1,N)), size=N)
}
@@ -186,10 +223,13 @@ find.clusters.data.frame <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, st
########################
## find.clusters.genind
########################
-find.clusters.genind <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, stat=c("BIC", "AIC", "WSS"), choose.n.clust=TRUE,
- criterion=c("diffNgroup", "min","goesup", "smoothNgoesup", "goodfit"),
- max.n.clust=round(nrow(x at tab)/10), n.iter=1e5, n.start=10,
- scale=FALSE, truenames=TRUE, ...){
+find.clusters.genind <- function(x, clust = NULL, n.pca = NULL, n.clust = NULL,
+ method = c("kmeans", "ward"),
+ stat = c("BIC", "AIC", "WSS"),
+ choose.n.clust=TRUE,
+ criterion = c("diffNgroup", "min","goesup", "smoothNgoesup", "goodfit"),
+ max.n.clust = round(nrow(x at tab)/10), n.iter = 1e5, n.start = 10,
+ scale = FALSE, truenames = TRUE, ...){
## CHECKS ##
if(!is.genind(x)) stop("x must be a genind object.")
@@ -207,8 +247,10 @@ find.clusters.genind <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, stat=c
NA.method = "mean")
## CALL DATA.FRAME METHOD
- res <- find.clusters(X, clust=clust, n.pca=n.pca, n.clust=n.clust, stat=stat, max.n.clust=max.n.clust, n.iter=n.iter, n.start=n.start,
- choose.n.clust=choose.n.clust, criterion=criterion, center=FALSE, scale=FALSE,...)
+ res <- find.clusters(X, clust=clust, n.pca=n.pca, n.clust=n.clust, stat=stat,
+ max.n.clust=max.n.clust, n.iter=n.iter, n.start=n.start,
+ choose.n.clust=choose.n.clust, method = method,
+ criterion=criterion, center=FALSE, scale=FALSE,...)
return(res)
} # end find.clusters.genind
@@ -233,10 +275,14 @@ find.clusters.matrix <- function(x, ...){
##########################
## find.clusters.genlight
##########################
-find.clusters.genlight <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, stat=c("BIC", "AIC", "WSS"), choose.n.clust=TRUE,
- criterion=c("diffNgroup", "min","goesup", "smoothNgoesup", "goodfit"),
- max.n.clust=round(nInd(x)/10), n.iter=1e5, n.start=10,
- scale=FALSE, pca.select=c("nbEig","percVar"), perc.pca=NULL, glPca=NULL, ...){
+find.clusters.genlight <- function(x, clust = NULL, n.pca = NULL, n.clust = NULL,
+ method = c("kmeans", "ward"),
+ stat = c("BIC", "AIC", "WSS"),
+ choose.n.clust = TRUE,
+ criterion = c("diffNgroup", "min","goesup", "smoothNgoesup", "goodfit"),
+ max.n.clust = round(nInd(x)/10), n.iter = 1e5, n.start = 10,
+ scale = FALSE, pca.select = c("nbEig","percVar"),
+ perc.pca = NULL, glPca = NULL, ...){
## CHECKS ##
if(!inherits(x, "genlight")) stop("x is not a genlight object.")
@@ -274,13 +320,13 @@ find.clusters.genlight <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, stat
if(is.null(n.pca) & pca.select=="nbEig"){
plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the number PCs to retain (>=1): ")
- n.pca <- as.integer(readLines(n = 1))
+ n.pca <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(is.null(perc.pca) & pca.select=="percVar"){
plot(cumVar, xlab="Number of retained PCs", ylab="Cumulative variance (%)", main="Variance explained by PCA", col=myCol)
cat("Choose the percentage of variance to retain (0-100): ")
- nperc.pca <- as.numeric(readLines(n = 1))
+ nperc.pca <- as.numeric(readLines(con = getOption('adegenet.testcon'), n = 1))
}
## get n.pca from the % of variance to conserve
@@ -302,8 +348,10 @@ find.clusters.genlight <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, stat
## CALL DATA.FRAME METHOD
- res <- find.clusters(pcaX$li, clust=clust, n.pca=n.pca, n.clust=n.clust, stat=stat, max.n.clust=max.n.clust, n.iter=n.iter, n.start=n.start,
- choose.n.clust=choose.n.clust, criterion=criterion, center=FALSE, scale=FALSE, dudi=pcaX)
+ res <- find.clusters(pcaX$li, clust=clust, n.pca=n.pca, n.clust=n.clust,
+ stat=stat, max.n.clust=max.n.clust, n.iter=n.iter, n.start=n.start,
+ choose.n.clust=choose.n.clust, method = method,
+ criterion=criterion, center=FALSE, scale=FALSE, dudi=pcaX)
return(res)
} # end find.clusters.genlight
@@ -342,7 +390,8 @@ find.clusters.genlight <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, stat
## find sub clusters
for(i in levels(clust)){
if(!quiet) cat("\nLooking for sub-clusters in cluster",i,"\n")
- myArgs$x <- x[clust==i, ]
+ myArgs$x <- x[clust==i, , drop = FALSE]
+ myArgs$max.n.clust <- nrow(x[clust==i, , drop = FALSE]) - 1
temp <- do.call(find.clusters, myArgs)$grp
levels(temp) <- paste(i, levels(temp), sep=".")
newFac[clust==i] <- as.character(temp)
@@ -355,3 +404,10 @@ find.clusters.genlight <- function(x, clust=NULL, n.pca=NULL, n.clust=NULL, stat
+
+
+## Compute within sum of squares from a matrix 'x' and a factor 'f'
+.compute.wss <- function(x, f) {
+ x.group.mean <- apply(x, 2, tapply, f, mean)
+ sum((x - x.group.mean[as.character(f),])^2)
+}
diff --git a/R/gengraph.R b/R/gengraph.R
index b9ae4e2..2370299 100644
--- a/R/gengraph.R
+++ b/R/gengraph.R
@@ -50,7 +50,7 @@ gengraph.matrix <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TR
}
cat("\nPlease choose a cutoff distance: ")
ans <- NA
- while(is.null(ans) || is.na(ans)) suppressWarnings(ans <- as.numeric(readLines(n = 1)))
+ while(is.null(ans) || is.na(ans)) suppressWarnings(ans <- as.numeric(readLines(con = getOption('adegenet.testcon'), n = 1)))
if(plot){
abline(v=ans,col="red",lty=2, lwd=2)
}
@@ -64,7 +64,7 @@ gengraph.matrix <- function(x, cutoff=NULL, ngrp=NULL, computeAll=FALSE, plot=TR
ans <- ""
while(!ans %in% c("y","n")){
cat("\nAre you satisfied with this solution? (yes:y / no:n): ")
- ans <- tolower(readLines(n = 1))
+ ans <- tolower(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(ans=="y") chooseAgain <- FALSE
}
diff --git a/R/glFunctions.R b/R/glFunctions.R
index 2eb087d..ab4dd47 100644
--- a/R/glFunctions.R
+++ b/R/glFunctions.R
@@ -172,7 +172,7 @@ glVar <- function(x, alleleAsUnit=TRUE){
## between centred/scaled vectors
## of SNPs
glDotProd <- function(x, center=FALSE, scale=FALSE, alleleAsUnit=FALSE,
- parallel=require("parallel"), n.cores=NULL){
+ parallel=FALSE, n.cores=NULL){
if(!inherits(x, "genlight")) stop("x is not a genlight object")
## SOME CHECKS ##
@@ -277,7 +277,7 @@ glDotProd <- function(x, center=FALSE, scale=FALSE, alleleAsUnit=FALSE,
## PCA for genlight objects
##
glPca <- function(x, center=TRUE, scale=FALSE, nf=NULL, loadings=TRUE, alleleAsUnit=FALSE,
- useC=TRUE, parallel=require("parallel"), n.cores=NULL,
+ useC=TRUE, parallel=FALSE, n.cores=NULL,
returnDotProd=FALSE, matDotProd=NULL){
if(!inherits(x, "genlight")) stop("x is not a genlight object")
@@ -350,7 +350,7 @@ glPca <- function(x, center=TRUE, scale=FALSE, nf=NULL, loadings=TRUE, alleleAsU
a <- as.integer(a) / ploid.a
a[is.na(a)] <- vecMeans[is.na(a)]
b <- as.integer(b) / ploid.b
- a[is.na(a)] <- vecMeans[is.na(a)]
+ b[is.na(b)] <- vecMeans[is.na(b)]
return( sum( ((a-vecMeans)*(b-vecMeans))/vecVar, na.rm=TRUE ) )
}
}
@@ -359,7 +359,7 @@ glPca <- function(x, center=TRUE, scale=FALSE, nf=NULL, loadings=TRUE, alleleAsU
## COMPUTE ALL POSSIBLE DOT PRODUCTS (XX^T / n) ##
allComb <- combn(1:nInd(x), 2)
if(parallel){
- allProd <- unlist(mclapply(1:ncol(allComb), function(i) dotProd(x at gen[[allComb[1,i]]], x at gen[[allComb[2,i]]], myPloidy[allComb[1,i]], myPloidy[allComb[2,i]]),
+ allProd <- unlist(parallel::mclapply(1:ncol(allComb), function(i) dotProd(x at gen[[allComb[1,i]]], x at gen[[allComb[2,i]]], myPloidy[allComb[1,i]], myPloidy[allComb[2,i]]),
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE))
} else {
allProd <- unlist(lapply(1:ncol(allComb), function(i) dotProd(x at gen[[allComb[1,i]]], x at gen[[allComb[2,i]]], myPloidy[allComb[1,i]], myPloidy[allComb[2,i]]) ))
@@ -375,7 +375,7 @@ glPca <- function(x, center=TRUE, scale=FALSE, nf=NULL, loadings=TRUE, alleleAsU
## compute the diagonal
if(parallel){
- temp <- unlist(mclapply(1:nInd(x), function(i) dotProd(x at gen[[i]], x at gen[[i]], myPloidy[i], myPloidy[i]),
+ temp <- unlist(parallel::mclapply(1:nInd(x), function(i) dotProd(x at gen[[i]], x at gen[[i]], myPloidy[i], myPloidy[i]),
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE))/nInd(x)
} else {
temp <- unlist(lapply(1:nInd(x), function(i) dotProd(x at gen[[i]], x at gen[[i]], myPloidy[i], myPloidy[i]) ))/nInd(x)
@@ -400,7 +400,7 @@ glPca <- function(x, center=TRUE, scale=FALSE, nf=NULL, loadings=TRUE, alleleAsU
if(is.null(nf)){
barplot(eigRes$values, main="Eigenvalues", col=heat.colors(rank))
cat("Select the number of axes: ")
- nf <- as.integer(readLines(n = 1))
+ nf <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
## rescale PCs
diff --git a/R/glHandle.R b/R/glHandle.R
index 055cd1b..eaa1376 100644
--- a/R/glHandle.R
+++ b/R/glHandle.R
@@ -107,7 +107,8 @@ setMethod("[", signature(x="SNPbin", i="ANY"), function(x, i) {
## genlight
-setMethod("[", signature(x="genlight", i="ANY", j="ANY", drop="ANY"), function(x, i, j, ..., treatOther=TRUE, quiet=TRUE, drop=FALSE) {
+setMethod("[", signature(x = "genlight", i = "ANY", j = "ANY", drop = "ANY"),
+ function(x, i, j, ..., treatOther = TRUE, quiet = TRUE, drop = FALSE) {
if (missing(i)) i <- TRUE
if (missing(j)) j <- TRUE
@@ -375,7 +376,7 @@ setMethod("seppop", signature(x="genlight"), function(x, pop=NULL, treatOther=TR
## seploc
##########
setMethod("seploc", signature(x="genlight"), function(x, n.block=NULL, block.size=NULL, random=FALSE,
- parallel=require(parallel), n.cores=NULL){
+ parallel=FALSE, n.cores=NULL){
## CHECKS ##
if(is.null(n.block) & is.null(block.size)) stop("n.block and block.size are both missing.")
if(!is.null(n.block) & !is.null(block.size)) stop("n.block and block.size are both provided.")
@@ -414,10 +415,10 @@ setMethod("seploc", signature(x="genlight"), function(x, n.block=NULL, block.siz
if(parallel){
if(random){
- res <- mclapply(levels(fac.block), function(lev) x[, sample(which(fac.block==lev))],
+ res <- parallel::mclapply(levels(fac.block), function(lev) x[, sample(which(fac.block==lev))],
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE)
} else {
- res <- mclapply(levels(fac.block), function(lev) x[, which(fac.block==lev)],
+ res <- parallel::mclapply(levels(fac.block), function(lev) x[, which(fac.block==lev)],
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE)
}
} else {
diff --git a/R/spca.rtests.R b/R/global_local_tests.R
similarity index 100%
rename from R/spca.rtests.R
rename to R/global_local_tests.R
diff --git a/R/handling.R b/R/handling.R
index 2608806..eb7566a 100644
--- a/R/handling.R
+++ b/R/handling.R
@@ -64,7 +64,7 @@ setMethod("truenames",signature(x="genpop"), function(x){
#'
setGeneric("tab", function(x, ...) standardGeneric("tab"))
-.tabGetter <- function(x, freq=FALSE, NA.method=c("asis","mean","zero"), ...){
+.tabGetter <- function(x, freq = FALSE, NA.method = c("asis","mean","zero"), ...){
## handle arguments
NA.method <- match.arg(NA.method)
# outdim <- dim(x at tab)
@@ -76,9 +76,9 @@ setGeneric("tab", function(x, ...) standardGeneric("tab"))
}
## replace NAs if needed
- if(NA.method=="mean"){
+ if (NA.method == "mean"){
f1 <- function(vec){
- m <- mean(vec,na.rm=TRUE)
+ m <- mean(vec, na.rm = TRUE)
vec[is.na(vec)] <- m
return(vec)
}
@@ -86,7 +86,7 @@ setGeneric("tab", function(x, ...) standardGeneric("tab"))
out <- apply(out, 2, f1)
}
- if(NA.method=="zero"){
+ if (NA.method == "zero"){
out[is.na(out)] <- ifelse(freq, 0, 0L)
}
# dim(out) <- outdim
@@ -121,7 +121,7 @@ setMethod("tab", signature(x="genpop"), function(x, freq=FALSE, NA.method=c("asi
## compute frequencies
fac <- x at loc.fac
if (is.null(fac)) fac <- rep(1, nLoc(x))
- out <- apply(x at tab, 1, tapply, fac, f1)
+ out <- apply(x at tab, 1, tapply, fac, f1, simplify = FALSE)
if (ncol(x at tab) > 1){
## reshape into matrix
col.names <- do.call(c,lapply(out[[1]],names))
diff --git a/R/haploGen.R b/R/haploGen.R
index c47dba1..e62bf7c 100644
--- a/R/haploGen.R
+++ b/R/haploGen.R
@@ -36,7 +36,9 @@ haploGen <- function(seq.length=1e4, mu.transi=1e-4, mu.transv=mu.transi/2, t.ma
NUCL <- as.DNAbin(c("a","t","c","g"))
TRANSISET <- list('a'=as.DNAbin('g'), 'g'=as.DNAbin('a'), 'c'=as.DNAbin('t'), 't'=as.DNAbin('c'))
TRANSVSET <- list('a'=as.DNAbin(c('c','t')), 'g'=as.DNAbin(c('c','t')), 'c'=as.DNAbin(c('a','g')), 't'=as.DNAbin(c('a','g')))
- res <- list(seq=as.matrix(as.DNAbin(character(0))), dates=integer(), ances=character())
+ res <- list(seq = as.matrix(as.DNAbin(matrix(character(0), nrow = 0, ncol = 0))),
+ dates = integer(),
+ ances = character())
toExpand <- logical()
myGrid <- matrix(1:grid.size^2, ncol=grid.size, nrow=grid.size)
diff --git a/R/import.R b/R/import.R
index d6fb438..16ceff6 100644
--- a/R/import.R
+++ b/R/import.R
@@ -22,19 +22,22 @@
#'
#' The function \code{df2genind} converts a data.frame (or a matrix) into a
#' \linkS4class{genind} object. The data.frame must meet the following
-#' requirements:\cr
-#' - genotypes are in row (one row per genotype)\cr
-#' - markers/loci are in columns\cr
-#' - each element is a string of characters coding alleles, ideally separated by a character string (argument \code{sep});
-#' if no separator is used, the number of characters coding alleles must be indicated (argument \code{ncode}).\cr
-#'
-#' See \code{\link{genind2df}} to convert \linkS4class{genind} objects back to such a
-#' data.frame.
+#' requirements:
+#' \itemize{
+#' \item genotypes are in row (one row per genotype)
+#' \item markers/loci are in columns
+#' \item each element is a string of characters coding alleles, ideally
+#' separated by a character string (argument \code{sep}); if no separator is
+#' used, the number of characters coding alleles must be indicated (argument
+#' \code{ncode}).}
+#'
+#' See \code{\link{genind2df}} to convert \linkS4class{genind} objects back to
+#' such a data.frame.
#'
#' === Details for the \code{sep} argument ===\cr this character is directly
-#' used in reguar expressions like \code{gsub}, and thus require some
-#' characters to be preceeded by double backslashes. For instance, "/" works
-#' but "|" must be coded as "\\|".
+#' used in reguar expressions like \code{gsub}, and thus require some characters
+#' to be preceeded by double backslashes. For instance, "/" works but "|" must
+#' be coded as "\\|".
#'
#' @aliases df2genind
#' @param X a matrix or a data.frame containing allelle data only (see
@@ -43,12 +46,14 @@
#' @param ncode an optional integer giving the number of characters used for
#' coding one genotype at one locus. If not provided, this is determined from
#' data.
-#' @param ind.names optinal, a vector giving the individuals names; if NULL, taken
-#' from rownames of X. If factor or numeric, vector is converted to character.
+#' @param ind.names optinal, a vector giving the individuals names; if NULL,
+#' taken from rownames of X. If factor or numeric, vector is converted to
+#' character.
#' @param loc.names an optional character vector giving the markers names; if
#' NULL, taken from colnames of X.
#' @param pop an optional factor giving the population of each individual.
-#' @param NA.char a character string corresponding to missing allele (to be treated as NA)
+#' @param NA.char a character string corresponding to missing allele (to be
+#' treated as NA)
#' @param ploidy an integer indicating the degree of ploidy of the genotypes.
#' @param type a character string indicating the type of marker: 'codom' stands
#' for 'codominant' (e.g. microstallites, allozymes); 'PA' stands for
@@ -60,7 +65,7 @@
#' levels in your strata. see \code{\link{hierarchy}} for details.
#'
#' @return an object of the class \linkS4class{genind} for \code{df2genind}; a
-#' matrix of biallelic genotypes for \code{genind2df}
+#' matrix of biallelic genotypes for \code{genind2df}
#'
#' @author Thibaut Jombart \email{t.jombart@@imperial.ac.uk}, Zhian N. Kamvar
#' \email{kamvarz@@science.oregonstate.edu}
@@ -156,13 +161,18 @@ df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL,
colnames(X) <- loc.names
- ## pop argument
- if(!is.null(pop)){
- if(length(pop)!= n) stop("length of factor pop differs from nrow(X)")
- pop <- as.factor(pop)
+ ## check alleles for periods
+ if (length(grep("[.]", X)) > 0L){
+ if (is.null(sep) || sep != "_"){
+ warning("character '.' detected in names of loci; replacing with '_'")
+ replacement <- "_"
+ } else {
+ warning("character '.' detected in names of loci; replacing with 'p'")
+ replacement <- "p"
+ }
+ X <- apply(X, 2, function(i) gsub("[.]", replacement, i))
}
-
## PRESENCE/ABSENCE MARKERS ##
if(toupper(type)=="PA"){
## preliminary stuff
@@ -291,9 +301,9 @@ df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL,
dimnames(out) <- list(rownames(out), colnames(out))
## restore NAs
- ##
+ ##
## Thanks to Klaus Schliep for the proposed speedup:
- ##
+ ##
# if (length(NA.posi) > 0) {
# out.colnames <- colnames(out)
# NA.row <- match(NA.ind, rownames(out))
@@ -302,27 +312,40 @@ df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL,
# loc.list <- lapply(uloc, grep, out.colnames)
# NA.col <- match(loc, uloc)
# out[cbind(rep(NA.row, unlist(lapply(loc.list, length))[NA.col]), unlist(loc.list[NA.col]))] <- NA
- # }
- ## This one is modified from above to make everything more explicit.
+ # }
+ ## This one is modified from above to make everything more explicit.
if (length(NA.posi) > 0) {
out.colnames <- colnames(out)
NA.row <- match(NA.ind, rownames(out))
loc <- paste0(NA.locus, "\\.")
uloc <- unique(loc)
- loc.list <- lapply(uloc, grep, out.colnames)
+ loc.list <- lapply(uloc, FUN = function(x, y) {
+ grep(pattern = paste("^", x, sep = ""), x = out.colnames, perl = TRUE)
+ }, y = out.colnames)
NA.col <- match(loc, uloc)
-
+
# Coordinates for missing rows
missing.ind <- vapply(loc.list, length, integer(1))[NA.col]
missing.ind <- rep(NA.row, missing.ind)
# Coordinates for missing columns
missing.loc <- unlist(loc.list[NA.col], use.names = FALSE)
-
+
missing_coordinates <- matrix(0L, nrow = length(missing.ind), ncol = 2L)
missing_coordinates[, 1] <- missing.ind
missing_coordinates[, 2] <- missing.loc
-
+ # [,1] [,2]
+ # [1,] 2 1
+ # [2,] 3 1
+ # [3,] 4 13
+ # [4,] 4 14
+
out[missing_coordinates] <- NA
+
+ # X1401_25.33
+ # A_KH1584 1
+ # C_KH1059 1
+ # M_KH1834 1
+ # M_KH1837 1
}
@@ -429,8 +452,9 @@ read.genetix <- function(file=NULL,quiet=FALSE) {
colnames(X) <- loc.names
- ## make a factor "pop" if there is more than one population
- pop <- factor(rep(pop.names,pop.nind))
+ ## pop is kept as character; treatment and conversion to a factor belongs to the constructor
+ ## (otherwise there is potential for inconsistencies across different import functions
+ pop <- as.character(rep(pop.names,pop.nind))
## pass X to df2genind
res <- df2genind(X=X, ncode=3, pop=pop, ploidy=2, NA.char="000")
@@ -502,8 +526,8 @@ read.fstat <- function(file, quiet=FALSE){
txt <- .rmspaces(txt)
txt <- sapply(1:length(txt),function(i) unlist(strsplit(txt[i],"([[:space:]]+)|([[:blank:]]+)")) )
X <- t(txt)
- pop <- factor(X[,1])
- if(length(levels(pop)) == 1 ) pop <- NULL
+ pop <- as.character(X[,1])
+ if(length(unique(pop)) == 1 ) pop <- NULL
X <- X[,-1]
colnames(X) <- loc.names
@@ -514,7 +538,7 @@ read.fstat <- function(file, quiet=FALSE){
X[X %in% allNAs] <- NA.char
## call df2genind
- res <- df2genind(X=X,pop=pop, ploidy=2, ncode=ncode, NA.char=NA.char)
+ res <- df2genind(X=X, pop=pop, ploidy=2, ncode=ncode, NA.char=NA.char)
res at call <- call
if(!quiet) cat("\n...done.\n\n")
@@ -655,7 +679,7 @@ read.genepop <- function(file, ncode=2L, quiet=FALSE){
if(!all(unique(nchar(X))==(ncode*2))) stop(paste("some alleles are not encoded with", ncode,
"characters\nCheck 'ncode' argument"))
- res <- df2genind(X=X,pop=pop, ploidy=2, ncode=ncode, NA.char=NA.char)
+ res <- df2genind(X=X, pop=as.character(pop), ploidy=2, ncode=ncode, NA.char=NA.char)
res at call <- prevcall
if(!quiet) cat("\n...done.\n\n")
@@ -745,22 +769,22 @@ read.structure <- function(file, n.ind=NULL, n.loc=NULL, onerowperind=NULL,
## required questions
if(is.null(n.ind)){
cat("\n How many genotypes are there? ")
- n.ind <- as.integer(readLines(n = 1))
+ n.ind <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(is.null(n.loc)){
cat("\n How many markers are there? ")
- n.loc <- as.integer(readLines(n = 1))
+ n.loc <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(is.null(col.lab)){
cat("\n Which column contains labels for genotypes ('0' if absent)? ")
- col.lab <- as.integer(readLines(n = 1))
+ col.lab <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(is.null(col.pop)){
cat("\n Which column contains the population factor ('0' if absent)? ")
- col.pop <- as.integer(readLines(n = 1))
+ col.pop <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(is.null(col.others) & ask){
@@ -771,12 +795,12 @@ read.structure <- function(file, n.ind=NULL, n.loc=NULL, onerowperind=NULL,
if(is.null(row.marknames)){
cat("\n Which row contains the marker names ('0' if absent)? ")
- row.marknames <- as.integer(readLines(n = 1))
+ row.marknames <- as.integer(readLines(con = getOption('adegenet.testcon'), n = 1))
}
if(is.null(onerowperind)){
cat("\n Are genotypes coded by a single row (y/n)? ")
- onerowperind <- toupper(readLines(n = 1))
+ onerowperind <- toupper(readLines(con = getOption('adegenet.testcon'), n = 1))
if(onerowperind == "Y") {
onerowperind <- TRUE
} else {
@@ -786,7 +810,7 @@ read.structure <- function(file, n.ind=NULL, n.loc=NULL, onerowperind=NULL,
if(is.null(NA.char)){
cat("\n What is the code for missing data (default is '-9')? ")
- NA.char <- as.character(readLines(n = 1))
+ NA.char <- as.character(readLines(con = getOption('adegenet.testcon'), n = 1))
}
## message to console
@@ -837,7 +861,7 @@ read.structure <- function(file, n.ind=NULL, n.loc=NULL, onerowperind=NULL,
## population factor
if(col.pop !=0) {
- pop <- factor(mat[, col.pop])
+ pop <- as.character(mat[, col.pop])
} else {
pop <- NULL
}
@@ -881,7 +905,7 @@ read.structure <- function(file, n.ind=NULL, n.loc=NULL, onerowperind=NULL,
rownames(X) <- ind.names
colnames(X) <- loc.names
- res <- df2genind(X=X,pop=pop, ploidy=2,sep=sep,ncode=ncode)
+ res <- df2genind(X=X, pop=pop, ploidy=2, sep=sep, ncode=ncode)
res at call <- match.call()
@@ -1073,7 +1097,7 @@ import2genind <- function(file, quiet=FALSE, ...){
#' @export read.snp
#'
read.snp <- function(file, quiet=FALSE, chunkSize=1000,
- parallel=require("parallel"), n.cores=NULL, ...){
+ parallel=FALSE, n.cores=NULL, ...){
ext <- .readExt(file)
ext <- toupper(ext)
if(ext != "SNP") warning("wrong file extension - '.snp' expected")
@@ -1152,7 +1176,7 @@ read.snp <- function(file, quiet=FALSE, chunkSize=1000,
temp <- strsplit(txt[ID.INDIV+1], "")
temp <- lapply(temp, function(e) suppressWarnings(as.integer(e)))
if(parallel){
- res <- c(res, mclapply(temp, function(e) new("SNPbin", e),
+ res <- c(res, parallel::mclapply(temp, function(e) new("SNPbin", e),
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE) )
} else {
res <- c(res, lapply(temp, function(e) new("SNPbin", e)) )
@@ -1238,7 +1262,7 @@ read.snp <- function(file, quiet=FALSE, chunkSize=1000,
#' provided, a list of two components is returned, containing chromosome and
#' position information.
#'
-extract.PLINKmap <- function(file, x=NULL){
+extract.PLINKmap <- function(file, x = NULL){
## CHECK EXTENSION ##
ext <- .readExt(file)
ext <- toupper(ext)
@@ -1247,28 +1271,33 @@ extract.PLINKmap <- function(file, x=NULL){
## READ FILE ##
## find nb of columns
- txt <- scan(file,what="character",sep="\n",quiet=TRUE, nlines=1)
+ txt <- scan(file, what = "character", sep = "\n", quiet = TRUE, nlines = 1)
nb.col <- length( unlist(strsplit(txt,"[[:blank:]]+")))
## read file
- txt <- scan(file,what="character",sep="\t",quiet=TRUE)
+ txt <- scan(file, what = "character", sep= "\n", quiet = TRUE)
+ txt <- unlist(strsplit(as.vector(txt), split = "[[:blank:]]"))
txt <- matrix(txt, ncol=4, byrow=TRUE)
## EXTRACT INFO AND RETURN OBJECT ##
## return a genlight
- if(!is.null(x)){
- ## match data
- ord <- match(locNames(x), txt[,2]) # check that it is the 2nd column
+ if (!is.null(x)) {
if(!inherits(x, "genlight")) stop("x is not a genlight object")
- other(x)$chromosome <- factor(txt[ord,1])
- other(x)$position <- as.integer(txt[ord,4])
+
+ ## match data: we need remove the potential alleles added to locus names
+ marker_id <- sub("_.*$", "", locNames(x))
+ ord <- match(marker_id, txt[,2])
+
+ chromosome(x) <- factor(txt[ord, 1])
+ position(x) <- as.integer(txt[ord, 4])
return(x)
}
## return a list
- res <- list(chromosome=factor(txt[ord,1]), position=as.integer(txt[ord,4]))
+ res <- list(chromosome = factor(txt[ord, 1]),
+ position = as.integer(txt[ord, 4]))
return(res)
} # end extract.PLINKmap
@@ -1306,7 +1335,7 @@ extract.PLINKmap <- function(file, x=NULL){
#' Data need to be exported from PLINK using the option "--recodeA" (and NOT
#' "--recodeAD"). The PLINK command should therefore look like: \code{plink
#' --file data --recodeA}. For more information on this topic, please look at
-#' this webpage: \url{http://pngu.mgh.harvard.edu/~purcell/plink/dataman.shtml}
+#' this webpage: \url{http://zzz.bwh.harvard.edu/plink/}
#'
#' @aliases read.PLINK read.plink
#' @param file for \code{read.PLINK} a character string giving the path to the
@@ -1413,7 +1442,7 @@ read.PLINK <- function(file, map.file=NULL, quiet=FALSE, chunkSize=1000,
txt <- lapply(txt, function(e) suppressWarnings(as.integer(e[-(1:6)])))
if(parallel){
- res <- c(res, mclapply(txt, function(e) new("SNPbin", snp=e, ploidy=2L),
+ res <- c(res, parallel::mclapply(txt, function(e) new("SNPbin", snp=e, ploidy=2L),
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE) )
} else {
res <- c(res, lapply(txt, function(e) new("SNPbin", snp=e, ploidy=2L)) )
@@ -1524,7 +1553,7 @@ fasta2genlight <- function(file, quiet=FALSE, chunkSize=1000, saveNbAlleles=FALS
IND.LAB <- c(IND.LAB, sub(">","",txt[grep("^>", txt)])) # find individuals' labels
txt <- split(txt, rep(1:nb.ind, each=LINES.PER.IND)) # split per individuals
if(parallel){
- txt <- mclapply(txt, function(e) strsplit(paste(e[-1], collapse=""), split=""),
+ txt <- parallel::mclapply(txt, function(e) strsplit(paste(e[-1], collapse=""), split=""),
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE) # each genome -> one vector
} else {
txt <- lapply(txt, function(e) strsplit(paste(e[-1], collapse=""), split="")) # each genome -> one vector
@@ -1582,7 +1611,7 @@ fasta2genlight <- function(file, quiet=FALSE, chunkSize=1000, saveNbAlleles=FALS
nb.ind <- length(grep("^>", txt))
txt <- split(txt, rep(1:nb.ind, each=LINES.PER.IND)) # split per individuals
if(parallel){
- txt <- mclapply(txt, function(e) strsplit(paste(e[-1], collapse=""), split="")[[1]][snp.posi],
+ txt <- parallel::mclapply(txt, function(e) strsplit(paste(e[-1], collapse=""), split="")[[1]][snp.posi],
mc.cores=n.cores, mc.silent=TRUE, mc.cleanup=TRUE, mc.preschedule=FALSE) # each genome -> one SNP vector
} else {
txt <- lapply(txt, function(e) strsplit(paste(e[-1], collapse=""), split="")[[1]][snp.posi]) # each genome -> one SNP vector
@@ -1627,9 +1656,11 @@ fasta2genlight <- function(file, quiet=FALSE, chunkSize=1000, saveNbAlleles=FALS
fasta2DNAbin <- function(file, quiet=FALSE, chunkSize=10, snpOnly=FALSE){
## HANDLE ARGUMENTS ##
- ext <- .readExt(file)
- ext <- toupper(ext)
- if(!ext %in% c("FASTA", "FA", "FAS")) warning("wrong file extension - '.fasta', '.fa' or '.fas' expected")
+ if (!is(file, "connection")) {
+ ext <- .readExt(file)
+ ext <- toupper(ext)
+ if(!ext %in% c("FASTA", "FA", "FAS")) warning("wrong file extension - '.fasta', '.fa' or '.fas' expected")
+ }
if(!quiet) cat("\n Converting FASTA alignment into a DNAbin object... \n\n")
diff --git a/R/inbreeding.R b/R/inbreeding.R
index ee97954..e41480d 100644
--- a/R/inbreeding.R
+++ b/R/inbreeding.R
@@ -10,20 +10,20 @@ inbreeding <- function(x, pop=NULL, truenames=TRUE, res.type=c("sample","functio
## if(x$ploidy != 2) stop("this inbreeding coefficient is designed for diploid genotypes only")
PLO <- ploidy(x)
+ LOC <- locFac(x)
if(!is.null(pop)) pop(x) <- pop
- if(is.null(x at pop) && is.null(pop)) {
+ if(is.null(pop(x)) && is.null(pop)) {
pop(x) <- factor(rep(1, nInd(x)))
}
- ## COMPUTATIONS ##
+ ## COMPUTATIONS ##
## get allele frequencies and \sum p_i^2 by pop and loc ##
## (generalized to any ploidy) ##
- ## tabfreq2 <- (makefreq(x = genind2genpop(x, quiet = TRUE), quiet=TRUE, truenames=truenames)$tab) ^2
## For genpop objects, a constant ploidy is assumed/needed. This kludge will do for now.
tabfreq2 <- tab(genind2genpop(x, quiet = TRUE), freq=TRUE) ^ PLO[1]
- sumpi2 <- t(apply(tabfreq2, 1, tapply, x$loc.fac, sum))
+ sumpi2 <- t(apply(tabfreq2, 1, tapply, LOC, sum))
## function to check a 1-locus genotype for homozigosity
## returns 1 if homoz, 0 otherwise
@@ -36,22 +36,11 @@ inbreeding <- function(x, pop=NULL, truenames=TRUE, res.type=c("sample","functio
}
## get the table of binary hetero/homo data
- if (truenames) {
- X <- tab(x)
- } else
- X <- tab(x)
-
- homotab <- t(apply(X, 1, tapply, x at loc.fac, f1))
-
-
+ X <- tab(x)
+ homotab <- t(apply(X, 1, tapply, LOC, f1))
+
## get pi2 for the appropriate pop
- if(truenames){
- popx <- pop(x)
- } else {
- popx <- x$pop
- }
-
- popx <- as.character(popx)
+ popx <- as.character(pop(x))
tabpi2 <- sumpi2[popx, , drop=FALSE]
@@ -63,13 +52,13 @@ inbreeding <- function(x, pop=NULL, truenames=TRUE, res.type=c("sample","functio
myEnv <- new.env()
assign("x", x, envir=myEnv)
assign("sumpi2", sumpi2, envir=myEnv)
- res <- function(F) {
+ res <- function(Fest) { # F estimate
## cat("\nx used:\n") # debugging
## print(x)
## cat("\nsumpi2 used:\n") # debugging
## print(sumpi2)
-
- return(exp(sum(log( x*(F+(1-F)*sumpi2) + (1-x)*(1-(F+(1-F)*sumpi2)) ) ,na.rm=TRUE)))
+ phom <- Fest + (1-Fest)*sumpi2
+ return(exp(sum(log(x*phom + (1-x)*(1-phom)), na.rm=TRUE)))
}
environment(res) <- myEnv
@@ -79,23 +68,17 @@ inbreeding <- function(x, pop=NULL, truenames=TRUE, res.type=c("sample","functio
## get likelihood functions for all individuals
res <- lapply(1:nrow(homotab), function(i) LIK(homotab[i,], tabpi2[i,]) )
- res <- lapply(res, Vectorize)
-
- ## name output
- if(truenames) {
- names(res) <- indNames(x)
- } else {
- names(res) <- names(x$tab)
- }
+ res <- setNames(lapply(res, Vectorize), indNames(x))
## IF WE RETURN FUNCTIONS ##
- if(res.type=="function"){
- return(res)
+ if (res.type=="function"){
+ return(res)
}
+ ## IF WE RETURN MLE ##
if (res.type == "estimate"){
- opfun <- function(x, ...) optimize(x, ...)[[1]]
+ opfun <- function(x, ...) optimize(x, ...)[[1]]
funval <- numeric(1)
res <- vapply(res, FUN = opfun , FUN.VALUE = funval, interval = c(0, 1),
maximum = TRUE, tol = .Machine$double.eps^0.75)
diff --git a/R/monmonier.R b/R/monmonier.R
index f5d8d29..596fdba 100644
--- a/R/monmonier.R
+++ b/R/monmonier.R
@@ -60,7 +60,7 @@ if(scanthres){
abline(h=Dlim,lty=2)
mtext("Dashed line indicates present threshold")
cat("Indicate the threshold (\'d\' for default): ")
- temp <- as.character(readLines(n = 1))
+ temp <- as.character(readLines(con = getOption('adegenet.testcon'), n = 1))
if(toupper(temp)!="D") { Dlim <- as.numeric(temp) }
}
@@ -616,7 +616,7 @@ if(scanthres){
abline(h=Dlim,lty=2)
mtext("Dashed line indicates present threshold")
cat("Indicate the threshold (\'d\' for default): ")
- temp <- as.character(readLines(n = 1))
+ temp <- as.character(readLines(con = getOption('adegenet.testcon'), n = 1))
if(toupper(temp)!="D") { Dlim <- as.numeric(temp) }
}
diff --git a/R/seqTrack.R b/R/seqTrack.R
index ffb9316..6b63676 100644
--- a/R/seqTrack.R
+++ b/R/seqTrack.R
@@ -620,7 +620,7 @@ plot.seqTrack <- function(x, y=NULL, col.pal=redpal, ...){
## if(!exists("edmondsOptimumBranching")) {
## stop("edmondsOptimumBranching does not exist; \nmake sure to use the latest Bioconductor (not CRAN) version of RBGL")
## cat("\nWould you like to try and install latest version of RBGL (needs internet connection)\n y/n: ")
-## ans <- tolower(as.character(readLines(n = 1)))
+## ans <- tolower(as.character(readLines(con = getOption('adegenet.testcon'), n = 1)))
## if(ans=="y"){
## source("http://bioconductor.org/biocLite.R")
## biocLite("RBGL")
diff --git a/R/showmekittens.R b/R/showmekittens.R
new file mode 100644
index 0000000..7e11df6
--- /dev/null
+++ b/R/showmekittens.R
@@ -0,0 +1,45 @@
+#' When you need a break...
+#'
+#' Genetic data analysis can be a harsh, tiring, daunting task.
+#' Sometimes, a mere break will not cut it.
+#' Sometimes, you need a kitten.
+#'
+#' @author Thibaut Jombart \email{thibautjombart@@gmail.com}
+#'
+#' @export
+#'
+#' @details
+#'
+#' Please send us more! Either pull request or submit an issue with a URL (use
+#' \code{adegenetIssues()}).
+#'
+#'
+#' @param x the name or index of the video to display; if NULL, a random video is chosen
+#'
+#' @param list a logical indicating if the list of available videos should be displayed
+#'
+showmekittens <- function(x = NULL, list = FALSE){
+ ## 'pool' is a named character vector of video URLs
+ pool <- c(capucine = "http://www.youtube.com/watch?v=KIePsbJSS04",
+ vacuum = "https://www.youtube.com/watch?v=uiyKVWqxXWM")
+
+ ## either we return the list of videos, or we show one
+ if (list) {
+ return(pool)
+ }
+
+ if (is.null(x)) {
+ x <- sample(seq_along(pool), 1L)
+ }
+
+ ## check that x is okay
+ if (is.numeric(x) && (x < 1 || x > length(pool))) {
+ stop(sprintf("Video index (%d) is wrong; there are currently %d videos in the list", x, length(pool)))
+ }
+
+ if (is.character(x) && !x %in% names(pool)) {
+ stop(sprintf("Video name (%s) is not in the list; use the option 'list=TRUE' to see available videos.", x))
+ }
+
+ browseURL(pool[x])
+}
diff --git a/R/snapclust.R b/R/snapclust.R
new file mode 100644
index 0000000..2d9b100
--- /dev/null
+++ b/R/snapclust.R
@@ -0,0 +1,465 @@
+#' Maximum-likelihood genetic clustering using EM algorithm
+#'
+#' Do not use. We work on that stuff. Contact us if interested.
+#'
+#' @author Thibaut Jombart \email{thibautjombart@@gmail.com} and Marie-Pauline
+#' Beugin
+#'
+#' @export
+#'
+#' @rdname snapclust
+#'
+#' @param x a \linkS4class{genind} object
+#'
+#' @param k the number of clusters to look for
+#'
+#' @param pop.ini parameter indicating how the initial group membership should
+#' be found. If \code{NULL}, groups are chosen at random, and the algorithm
+#' will be run \code{n.start times}. If "kmeans", then the function
+#' \code{find.clusters} is used to define initial groups using the K-means
+#' algorithm. If "ward", then the function \code{find.clusters} is used to
+#' define initial groups using the Ward algorithm. Alternatively, a factor
+#' defining the initial cluster configuration can be provided.
+#'
+#' @param max.iter the maximum number of iteration of the EM algorithm
+#'
+#' @param n.start the number of times the EM algorithm is run, each time with
+#' different random starting conditions
+#'
+#' @param n.start.kmeans the number of times the K-means algorithm is run to
+#' define the starting point of the ML-EM algorithm, each time with
+#' different random starting conditions
+#'
+#' @param hybrids a logical indicating if hybrids should be modelled
+#' explicitely; this is currently implemented for 2 groups only.
+#'
+#' @param dim.ini the number of PCA axes to retain in the dimension reduction
+#' step for \code{\link{find.clusters}}, if this method is used to define
+#' initial group memberships (see argument \code{pop.ini}).
+#'
+#' @param hybrid.coef a vector of hybridization coefficients, defining the
+#' proportion of hybrid gene pool coming from the first parental population;
+#' this is symmetrized around 0.5, so that e.g. c(0.25, 0.5) will be
+#' converted to c(0.25, 0.5, 0.75)
+#'
+#' @param parent.lab a vector of 2 character strings used to label the two
+#' parental populations; only used if hybrids are detected (see argument
+#' \code{hybrids})
+#'
+#' @param ... further arguments passed on to \code{\link{find.clusters}}
+#'
+#' @return
+#'
+#' The function \code{snapclust} returns a list with the following
+#' components:
+#' \itemize{
+#'
+#' \item \code{$group} a factor indicating the maximum-likelihood assignment of
+#' individuals to groups; if identified, hybrids are labelled after
+#' hybridization coefficients, e.g. 0.5_A - 0.5_B for F1, 0.75_A - 0.25_B for
+#' backcross F1 / A, etc.
+#'
+#' \item \code{$ll}: the log-likelihood of the model
+#'
+#' \item \code{$proba}: a matrix of group membership probabilities, with
+#' individuals in rows and groups in columns; each value correspond to the
+#' probability that a given individual genotype was generated under a given
+#' group, under Hardy-Weinberg hypotheses.
+#'
+#' \item \code{$converged} a logical indicating if the algorithm converged; if
+#' FALSE, it is doubtful that the result is an actual Maximum Likelihood
+#' estimate.
+#'
+#' \item \code{$n.iter} an integer indicating the number of iterations the EM
+#' algorithm was run for.
+#'
+#' }
+#'
+#' @examples
+#' \dontrun{
+#' data(microbov)
+#'
+#' ## try function using k-means initialization
+#' grp.ini <- find.clusters(microbov, n.clust=15, n.pca=150)
+#'
+#' ## run EM algo
+#' res <- snapclust(microbov, 15, pop.ini = grp.ini$grp)
+#' names(res)
+#' res$converged
+#' res$n.iter
+#'
+#' ## plot result
+#' compoplot(res)
+#'
+#' ## flag potential hybrids
+#' to.flag <- apply(res$proba,1,max)<.9
+#' compoplot(res, subset=to.flag, show.lab=TRUE,
+#' posi="bottomleft", bg="white")
+#'
+#'
+#' ## Simulate hybrids F1
+#' zebu <- microbov[pop="Zebu"]
+#' salers <- microbov[pop="Salers"]
+#' hyb <- hybridize(zebu, salers, n=30)
+#' x <- repool(zebu, salers, hyb)
+#'
+#' ## method without hybrids
+#' res.no.hyb <- snapclust(x, k=2, hybrids=FALSE)
+#' compoplot(res.no.hyb, col.pal=spectral, n.col=2)
+#'
+#' ## method with hybrids
+#' res.hyb <- snapclust(x, k=2, hybrids=TRUE)
+#' compoplot(res.hyb, col.pal =
+#' hybridpal(col.pal = spectral), n.col = 2)
+#'
+#'
+#' ## Simulate hybrids backcross (F1 / parental)
+#' f1.zebu <- hybridize(hyb, zebu, 20, pop = "f1.zebu")
+#' f1.salers <- hybridize(hyb, salers, 25, pop = "f1.salers")
+#' y <- repool(x, f1.zebu, f1.salers)
+#'
+#' ## method without hybrids
+#' res2.no.hyb <- snapclust(y, k = 2, hybrids = FALSE)
+#' compoplot(res2.no.hyb, col.pal = hybridpal(), n.col = 2)
+#'
+#' ## method with hybrids F1 only
+#' res2.hyb <- snapclust(y, k = 2, hybrids = TRUE)
+#' compoplot(res2.hyb, col.pal = hybridpal(), n.col = 2)
+#'
+#' ## method with back-cross
+#' res2.back <- snapclust(y, k = 2, hybrids = TRUE, hybrid.coef = c(.25,.5))
+#' compoplot(res2.hyb, col.pal = hybridpal(), n.col = 2)
+#'
+#' }
+
+snapclust <- function(x, k, pop.ini = "ward", max.iter = 100, n.start = 10,
+ n.start.kmeans = 50,
+ hybrids = FALSE, dim.ini = 100,
+ hybrid.coef = NULL, parent.lab = c('A', 'B'), ...) {
+ ## This function uses the EM algorithm to find ML group assignment of a set
+ ## of genotypes stored in a genind object into 'k' clusters. We need an
+ ## initial cluster definition to start with. The rest of the algorithm
+ ## consists of:
+
+ ## i) compute the matrix of allele frequencies
+
+ ## ii) compute the likelihood of each genotype for each group
+
+ ## iii) assign genotypes to the group for which they have the highest
+ ## likelihood
+
+ ## iv) go back to i) until convergence
+
+
+ ## Disable multiple starts if the initial condition is not random
+ use.random.start <- is.null(pop.ini)
+ if (!use.random.start) {
+ n.start <- 1L
+ }
+
+ if (n.start < 1L) {
+ stop(sprintf(
+ "n.start is less than 1 (%d); using n.start=1", n.start))
+ }
+
+ if (hybrids && k > 2) {
+ warning(sprintf(
+ "forcing k=2 for hybrid mode (requested k is %d)", k))
+ k <- 2
+ }
+
+
+ ## Handle hybrid coefficients; these values reflect the contribution of the
+ ## first parental population to the allele frequencies of the hybrid
+ ## group. For instance, a value of 0.75 indicates that 'a' contributes to
+ ## 75%, and 'b' 25% of the allele frequencies of the hybrid - a typical
+ ## backcross F1 / a.
+
+ if (hybrids) {
+ if (is.null(hybrid.coef)) {
+ hybrid.coef <- 0.5
+ }
+ hybrid.coef <- .tidy.hybrid.coef(hybrid.coef)
+ }
+
+
+ ## Initialisation using 'find.clusters'
+ if (!is.null(pop.ini)) {
+ if (tolower(pop.ini)[1] %in% c("kmeans", "k-means")) {
+ pop.ini <- find.clusters(x, n.clust = k, n.pca = dim.ini,
+ n.start = n.start.kmeans,
+ method = "kmeans", ...)$grp
+ } else if (tolower(pop.ini)[1] %in% c("ward")) {
+ pop.ini <- find.clusters(x, n.clust = k, n.pca = dim.ini,
+ method = "ward", ...)$grp
+ }
+ }
+
+ ## There is one run of the EM algo for each of the n.start random initial
+ ## conditions.
+ ll <- -Inf # this will be the total loglike
+
+ for (i in seq_len(n.start)) {
+
+ ## Set initial conditions: if initial pop is NULL, we create a random
+ ## group definition (each clusters have same probability)
+ if (use.random.start) {
+ pop.ini <- sample(seq_len(k), nInd(x), replace=TRUE)
+ }
+
+ ## process initial population, store levels
+ pop.ini <- factor(pop.ini)
+ lev.ini <- levels(pop.ini)[1:k] # k+1 would be hybrids
+
+ ## ensure 'pop.ini' matches 'k'
+ if (! (length(levels(pop.ini)) %in% c(k, k + length(hybrid.coef))) ) {
+ stop("pop.ini does not have k clusters")
+ }
+
+ ## initialisation
+ group <- factor(as.integer(pop.ini)) # set levels to 1:k (or k+1)
+ genotypes <- tab(x)
+ n.loc <- nLoc(x)
+ counter <- 0L
+ converged <- FALSE
+
+
+ ## This is the actual EM algorithm
+
+ while(!converged && counter<=max.iter) {
+
+ ## get table of allele frequencies (columns) by population (rows);
+ ## these are stored as 'pop.freq'; note that it will include extra
+ ## rows for different types of hybrids too.
+
+ if (hybrids) {
+ pop(x) <- group
+ id.parents <- .find.parents(x)
+ x.parents <- x[id.parents]
+ pop.freq <- tab(genind2genpop(x.parents, quiet=TRUE),
+ freq=TRUE)
+ pop.freq <- rbind(pop.freq, # parents
+ .find.freq.hyb(pop.freq, hybrid.coef)) # hybrids
+ } else {
+ pop.freq <- tab(genind2genpop(x, pop=group, quiet=TRUE),
+ freq=TRUE)
+ }
+
+ ## ensures no allele frequency is exactly zero
+ pop.freq <- .tidy.pop.freq(pop.freq, locFac(x))
+
+ ## get likelihoods of genotypes in every pop
+ ll.mat <- apply(genotypes, 1, .ll.genotype, pop.freq, n.loc)
+
+ ## assign individuals to most likely cluster
+ previous.group <- group
+ group <- apply(ll.mat, 2, which.max)
+
+ ## check convergence
+ ## converged <- all(group == previous.group)
+ old.ll <- .global.ll(previous.group, ll.mat)
+ new.ll <- .global.ll(group, ll.mat)
+ if (!is.finite(new.ll)) {
+ ## stop(sprintf("log-likelihood at iteration %d is not finite (%f)",
+ ## counter, new.ll))
+ }
+ converged <- abs(old.ll - new.ll) < 1e-14
+ counter <- counter + 1L
+
+ }
+
+ ## ## store the best run so far
+ ## new.ll <- .global.ll(group, ll.mat)
+
+ if (new.ll > ll || i == 1L) {
+ ## store results
+ ll <- new.ll
+ out <- list(group = group, ll = ll)
+
+ ## group membership probability
+ out$proba <- prop.table(t(exp(ll.mat)), 1)
+ out$converged <- converged
+ out$n.iter <- counter
+ }
+ } # end of the for loop
+
+ ## restore labels of groups
+ out$group <- factor(out$group)
+ if (hybrids) {
+ if (!is.null(parent.lab)) {
+ lev.ini <- parent.lab
+ }
+ hybrid.labels <- paste0(hybrid.coef, "_", lev.ini[1], "-",
+ 1 - hybrid.coef, "_", lev.ini[2])
+ lev.ini <- c(lev.ini, hybrid.labels)
+ }
+ levels(out$group) <- lev.ini
+ colnames(out$proba) <- lev.ini
+
+
+ ## compute the number of parameters; it is defined as the number of 'free'
+ ## allele frequencies, multiplied by the number of groups
+
+ out$n.param <- (ncol(genotypes) - n.loc) * length(lev.ini)
+
+ class(out) <- c("snapclust", "list")
+ return(out)
+}
+
+
+
+
+
+
+
+
+
+
+
+## Non-exported function which computes the log-likelihood of a genotype in
+## every population. For now only works for diploid individuals. 'x' is a vector
+## of allele counts; 'pop.freq' is a matrix of group allele frequencies, with
+## groups in rows and alleles in columns.
+
+## TODO: extend this to various ploidy levels, possibly optimizing procedures
+## for haploids.
+
+.ll.genotype <- function(x, pop.freq, n.loc){
+ ## homozygote (diploid)
+ ## p(AA) = f(A)^2 for each locus
+ ll.homoz.one.indiv <- function(f) {
+ sum(log(f[x == 2L]), na.rm = TRUE) * 2
+ }
+
+ ll.homoz <- apply(pop.freq, 1, ll.homoz.one.indiv)
+
+ ## heterozygote (diploid, expl with 2 loci)
+ ## p(Aa)p(Bb) = 2^n.loc * f(A)f(a) f(B)f(b)
+ ll.hetero.one.indiv <- function(f) {
+ sum(log(f[x == 1L]), na.rm = TRUE) + n.loc * log(2)
+ }
+
+ ll.heteroz <- apply(pop.freq, 1, ll.hetero.one.indiv)
+
+ return(ll.homoz + ll.heteroz)
+}
+
+
+
+
+
+
+## Non-exported function computing the total log-likelihood of the model given a vector of group
+## assignments and a table of ll of genotypes in each group
+
+.global.ll <- function(group, ll){
+ sum(t(ll)[cbind(seq_along(group), as.integer(group))], na.rm=TRUE)
+}
+
+
+
+
+
+## Non-exported function making a tidy vector of weights for allele frequencies
+## of parental populations. It ensures that given any input vector of weights
+## 'w' defining the types of hybrids, the output has the following properties:
+
+## - strictly on ]0,1[
+
+## - symmetric around 0.5, e.g. c(.25, .5) gives c(.25, .5, .75)
+
+## - sorted by decreasing values (i.e. hybrid types are sorted by decreasing
+## proximity to the first parental population.
+
+.tidy.hybrid.coef <- function(w) {
+ w <- w[w > 0 & w < 1]
+ w <- sort(unique(round(c(w, 1-w), 4)), decreasing = TRUE)
+ w
+}
+
+
+
+
+
+## Non-exported function determining vectors of allele frequencies in hybrids
+## from 2 parental populations. Different types of hybrids are determined by
+## weights given to the allele frequencies of the parental populations. Only one
+## such value is provided and taken to be the weight of the 1st parental
+## population; the complementary frequency is derived for the second parental
+## population.
+
+## Parameters are:
+
+## - x: matrix of allele frequencies for population 'a' (first row) and 'b'
+## (second row), where allele are in columns.
+
+
+## - w: a vector of weights for 'a' and 'b', each value determining a type of
+## hybrid. For instance, 0.5 is for F1, 0.25 for backcrosses F1/parental, 0.125
+## for 2nd backcross F1/parental, etc.
+
+## The output is a matrix of allele frequencies with hybrid types in rows and
+## alleles in columns.
+
+.find.freq.hyb <- function(x, w) {
+ out <- cbind(w, 1-w) %*% x
+ rownames(out) <- w
+ out
+}
+
+
+
+
+## Non-exported function trying to find the two parental populations in a genind
+## object containing 'k' clusters. The parental populations are defined as the
+## two most distant clusters. The other clusters are deemed to be various types
+## of hybrids. The output is a vector of indices identifying the individuals
+## from the parental populations.
+
+.find.parents <- function(x) {
+ ## matrix of pairwise distances between clusters, using Nei's distance
+ D <- as.matrix(dist.genpop(genind2genpop(x, quiet = TRUE), method = 1))
+ parents <- which(abs(max(D)-D) < 1e-14, TRUE)[1,]
+ out <- which(as.integer(pop(x)) %in% parents)
+ out
+}
+
+
+
+
+
+## Non-exported function enforcing a minimum allele frequency in a table of
+## allele frequency. As we are not accounting for the uncertainty in allele
+## frequencies, we need to allow for genotypes to be generated from a population
+## which does not have the genotype's allele represented, even if this is at a
+## low probability. The transformation is ad-hoc, and has the form:
+##
+## g(f_i) = (a + f_i / \sum(a + f_i))
+
+## where f_i is the i-th frequency in a given locus. However, this ensures that
+## the output has two important properties:
+
+## - it sums to 1
+## - it contains no zero
+
+## By default, we set 'a' to 0.01.
+
+## Function inputs are:
+
+## - 'pop.freq': matrix of allele frequencies, with groups in rows and alleles in
+## columns
+
+## - 'loc.fac': a factor indicating which alleles belong to which locus, as
+## returned by 'locFac([a genind])'
+
+.tidy.pop.freq <- function(pop.freq, loc.fac) {
+ g <- function(f, a = .01) {
+ (a + f) / sum(a + f)
+ }
+
+ out <- matrix(unlist(apply(pop.freq, 1, tapply, loc.fac, g),
+ use.names = FALSE),
+ byrow=TRUE, nrow=nrow(pop.freq))
+ dimnames(out) <- dimnames(pop.freq)
+ return(out)
+}
diff --git a/R/snapclust.choose.k.R b/R/snapclust.choose.k.R
new file mode 100644
index 0000000..e8c5a98
--- /dev/null
+++ b/R/snapclust.choose.k.R
@@ -0,0 +1,107 @@
+#' Choose the number of clusters for snapclust using BIC
+#'
+#' Do not use. We work on that stuff. Contact us if interested.
+#'
+#' @author Thibaut Jombart \email{thibautjombart@@gmail.com}
+#'
+#' @export
+#'
+#' @seealso \code{\link{snapclust}} to generate individual clustering solutions,
+#' and \code{\link{BIC.snapclust}} for computing BIC for \code{snapclust}
+#' objects.
+#'
+#' @param max An integer indicating the maximum number of clusters to seek;
+#' \code{\link{snapclust}} will be run for all k from 2 to max.
+#'
+#' @param IC A function computing the information criterion for
+#' \code{\link{snapclust}} objects. Available statistics are
+#' \code{AIC} (default), \code{AICc}, and \code{BIC}.
+#'
+#' @param IC.only A logical (TRUE by default) indicating if IC values only
+#' should be returned; if \code{FALSE}, full \code{snapclust} objects are
+#' returned.
+#'
+#' @param ... Arguments passed to \code{\link{snapclust}}.
+#'
+snapclust.choose.k <- function(max, ..., IC = AIC, IC.only = TRUE) {
+
+ ## This function is a glorified for loop which runs snapclust for several
+ ## values of 'k', from 2 to 'max'. It returns information criterion (AIC or
+ ## BIC), and can also return the full snapclust objects if needed. For
+ ## k=1, AIC and BIC are computed via an internal (i.e. non-exported)
+ ## procedure.
+
+ max <- as.integer(max)
+ if (any(!is.finite(max))) {
+ stop("Values of k need to be finite.")
+ }
+ if (max < 2) {
+ stop("maximum number of clusters should be at least 2")
+ }
+ k.values <- 2:max
+
+ call.args <- list(...)
+ genind.posi <- match("genind", sapply(call.args, class))
+ if (is.na(genind.posi)) {
+ stop("No genind provided in '...'.")
+ }
+ names(call.args)[genind.posi] <- "x"
+
+
+ out.IC <- double(length(k.values))
+ out.objects <- list(length(k.values))
+
+ for (i in seq_along(k.values)) {
+ ## get clustering solution for 'k'
+ call.args$k <- k.values[i]
+ out.objects[[i]] <- do.call(snapclust, call.args)
+ }
+
+ names(out.objects) <- k.values
+ out.IC <- .compute.null.IC(call.args$x)
+ out.IC <- c(out.IC, vapply(out.objects, IC, double(1)))
+ names(out.IC) <- 1:max
+
+ if (IC.only) {
+ out <- out.IC
+ } else {
+ out <- list(AIC = out.IC, objects = out.objects)
+
+ ## names stat as appropriate
+ names(out)[1] <- deparse(substitute(IC))
+ }
+
+ return(out)
+}
+
+
+
+
+
+## Non-exported procedure to compute the BIC for k = 1
+
+## - 'x' is a genind object.
+## - 'IC' is either AIC or BIC
+
+.compute.null.IC <- function(x, IC = AIC) {
+ group <- rep(1L, nInd(x))
+ n.loc <- nLoc(x)
+ genotypes <- tab(x)
+ pop.freq <- tab(genind2genpop(x, pop = group, quiet = TRUE),
+ freq = TRUE)
+
+ ## browser()
+
+ ## get likelihoods of genotypes
+ ll.mat <- apply(genotypes, 1, .ll.genotype, pop.freq, n.loc)
+ ll.mat <- matrix(ll.mat, nrow = 1)
+
+ ll <- .global.ll(group, ll.mat)
+
+ ## make a fake snapclust object to get IC
+ fake <- list(ll = ll,
+ group = group,
+ n.param = ncol(tab(x)) - nLoc(x))
+ class(fake) <- "snapclust"
+ IC(fake)
+}
diff --git a/R/snpposi.R b/R/snpposi.R
index b62dc28..4bf7e49 100644
--- a/R/snpposi.R
+++ b/R/snpposi.R
@@ -83,11 +83,8 @@ snpposi.plot.integer <- function(x, genome.size, smooth=0.1, col="royalblue", al
## IF WE REPRESENT DENSITY PER CODON POSITION ##
if(codon){
## define base positions (1/2/3) ##
- fac <- rep(1:3, length=genome.size)
- if(start.at==2) fac <- c(2:3,fac)[1:genome.size]
- if(start.at==3) fac <- c(3,fac)[1:genome.size]
- fac <- factor(fac, levels=1:3)
- fac <- fac[x]
+ codon.posi <- ((2 + x) %% 3) + 1
+ fac <- factor(codon.posi, levels=1:3)
## make ggplot output ##
out <- ggplot(data.frame(x=x, codon=fac), aes(x=x)) + xlim(0, genome.size)
diff --git a/R/spca.R b/R/spca.R
index 4d5e280..822673e 100644
--- a/R/spca.R
+++ b/R/spca.R
@@ -1,136 +1,305 @@
+###############################################
+##
+## spatial Principal Components Analysis
+##
+## require ade4, spdep and eventually tripack
+##
+## generic functions were derived from
+## those of multispati class (ade4)
+##
+## T. Jombart (t.jombart at imperial.ac.uk)
+## 31 may 2007
##############################################
-#
-# spatial Principal Components Analysis
-#
-# require ade4, spdep and eventually tripack
-#
-# generic functions were derived from
-# those of multispati class (ade4)
-#
-# T. Jombart (t.jombart at imperial.ac.uk)
-# 31 may 2007
-##############################################
+
+spca <- function (...) UseMethod("spca")
+
################
-# spca genind
+## spca.default
################
-spca <- function(obj, xy=NULL, cn=NULL, matWeight=NULL,
- scale=FALSE,
- scannf=TRUE, nfposi=1, nfnega=1,
- type=NULL, ask=TRUE, plot.nb=TRUE, edit.nb=FALSE,
- truenames=TRUE, d1=NULL, d2=NULL, k=NULL, a=NULL, dmin=NULL){
-
- ## first checks
- if(!any(inherits(obj,c("genind","genpop")))) stop("obj must be a genind or genpop object.")
- invisible(validObject(obj))
- ## checkType(obj)
-
-
- ## handle xy coordinates
- if(is.null(xy) & (inherits(cn,"nb") & !inherits(cn,"listw")) ){
- xy <- attr(cn,"xy") # xy can be retrieved from a nb object (not from listw)
- }
- if(is.null(xy) & !is.null(obj$other$xy)) {
- xy <- obj$other$xy # xy from @other$xy if it exists
- }
- if(is.null(xy)) stop("xy coordinates are not provided")
- if(is.data.frame(xy)) {
- xy <- as.matrix(xy)
- }
- if(!is.matrix(xy)) stop("wrong 'xy' provided")
- if(ncol(xy) != 2) stop("xy does not have two columns.")
- if(nrow(xy) != nrow(obj at tab)) stop("obj at tab and xy must have the same row numbers.")
+spca.default <- function(x, ...) {
+ stop(sprintf("No spca method for object of class %s",
+ paste(class(x), collapse = " ")))
+}
- appel <- match.call()
- ## == To find the spatial weights ==
- resCN <- NULL
- ## connection network from matWeight
- if(!is.null(matWeight)){
- if(!is.matrix(matWeight)) stop("matWeight is not a matrix")
- if(!is.numeric(matWeight)) stop("matWeight is not numeric")
- if(nrow(matWeight) != ncol(matWeight)) stop("matWeight is not square")
- if(nrow(matWeight) != nrow(obj at tab)) stop("dimension of datWeight does not match genetic data")
- diag(matWeight) <- 0
- matWeight <- prop.table(matWeight, 1)
- resCN <- mat2listw(matWeight)
- resCN$style <- "W"
- }
+###############
+## spca.matrix
+###############
+spca.matrix <- function(x, xy = NULL, cn = NULL, matWeight = NULL,
+ center = TRUE, scale = FALSE, scannf = TRUE,
+ nfposi = 1, nfnega = 1,
+ type = NULL, ask = TRUE,
+ plot.nb = TRUE, edit.nb = FALSE,
+ truenames = TRUE,
+ d1 = NULL, d2 = NULL, k = NULL,
+ a = NULL, dmin = NULL, ...) {
+
+ ## check type of x: only numeric values are acceptable
+
+ if (!is.numeric(x)) {
+ stop("Only matrices of numeric values are accepted.")
+ }
+
+
+ ## check and handle xy coordinates
+
+ if(is.null(xy) & (inherits(cn,"nb") & !inherits(cn,"listw")) ){
+ xy <- attr(cn,"xy") # xy can be retrieved from a nb object (not from listw)
+ }
+ if(is.null(xy)) {
+ stop("xy coordinates are not provided")
+ }
+ if(is.data.frame(xy)) {
+ xy <- as.matrix(xy)
+ }
+ if(!is.matrix(xy)) {
+ stop("provided 'xy' cannot be converted to matrix")
+ }
+ if(ncol(xy) != 2) {
+ stop("xy does not have two columns.")
+ }
+ if(nrow(xy) != nrow(x)) {
+ stop("x and xy must have the same row numbers.")
+ }
- ## connection network from cn argument
- if(is.null(resCN) & !is.null(cn)) {
- if(inherits(cn,"nb")) {
- if(!inherits(cn,"listw")){ # cn is a 'pure' nb object (i.e., nb but not listw)
- cn <- nb2listw(cn, style="W", zero.policy=TRUE)
- }
- resCN <- cn
- } else stop("cn does not have a recognized class")
- }
+ ## find the spatial weights
- ## connection network from xy coordinates
- if(is.null(resCN)) {
- resCN <- chooseCN(xy=xy, ask=ask, type=type, plot.nb=plot.nb, edit.nb=edit.nb,
- result.type="listw", d1=d1, d2=d2, k=k, a=a, dmin=dmin)
+ resCN <- NULL
+
+ ## connection network from matWeight
+
+ if (!is.null(matWeight)) {
+ if (!is.matrix(matWeight)) {
+ stop("matWeight is not a matrix")
+ }
+ if (!is.numeric(matWeight)) {
+ stop("matWeight is not numeric")
}
+ if (nrow(matWeight) != ncol(matWeight)) {
+ stop("matWeight is not square")
+ }
+ if (nrow(matWeight) != nrow(x)) {
+ stop("dimension of datWeight does not match data")
+ }
+ diag(matWeight) <- 0
+ matWeight <- prop.table(matWeight, 1)
+ resCN <- mat2listw(matWeight)
+ resCN$style <- "W"
- ## == spatial weights are done ==
+ }
+ ## connection network from cn argument
- ## handle NAs warning
- if(any(is.na(obj at tab))){
- warning("NAs in data are automatically replaced (to mean allele frequency)")
+ if(is.null(resCN) & !is.null(cn)) {
+ if(inherits(cn,"nb")) {
+ if(!inherits(cn,"listw")){ # cn is a 'pure' nb object (i.e., nb but not listw)
+ cn <- nb2listw(cn, style="W", zero.policy=TRUE)
+ }
+ resCN <- cn
+ } else {
+ stop("cn does not have a recognized class")
}
+ }
+
+
+ ## connection network from xy coordinates
+ if(is.null(resCN)) {
+ resCN <- chooseCN(xy=xy, ask=ask, type=type, plot.nb=plot.nb, edit.nb=edit.nb,
+ result.type="listw", d1=d1, d2=d2, k=k, a=a, dmin=dmin)
+ }
+
+
+ ## perform the analyses: basic PCA followed by multispati
+
+ x_pca <- ade4::dudi.pca(x, center = center, scale = scale, scannf = FALSE)
+
+ out <- ade4::multispati(dudi = x_pca, listw = resCN, scannf = scannf,
+ nfposi = nfposi, nfnega = nfnega)
+
+ nfposi <- out$nfposi
+ nfnega <- out$nfnega
+
+ out$tab <- x_pca$tab
+ out$xy <- xy
+ rownames(out$xy) <- rownames(out$li)
+ colnames(out$xy) <- c("x","y")
+
+ out$lw <- resCN
+
+ dots <- list(...)
+ if (!is.null(dots$call)) {
+ out$call <- dots$call
+ } else {
+ out$call <- match.call()
+ }
+
+ posaxes <- if (nfposi > 0) {1:nfposi} else NULL
+ negaxes <- if (nfnega > 0) {(length(out$eig)-nfnega+1):length(out$eig)} else NULL
+ keptaxes <- c(posaxes, negaxes)
- ## handle NAs, centring and scaling
- X <- scaleGen(obj, center=TRUE, scale=scale, NA.method="mean")
+ ## set names of different components
+ colnames(out$c1) <- paste("Axis",keptaxes)
+ colnames(out$li) <- paste("Axis",keptaxes)
+ colnames(out$ls) <- paste("Axis",keptaxes)
+ row.names(out$c1) <- colnames(x)
+ colnames(out$as) <- colnames(out$c1)
+ temp <- row.names(out$as)
+ row.names(out$as) <- paste("PCA", temp)
- ## perform analyses
- pcaX <- dudi.pca(X, center=FALSE, scale=FALSE, scannf=FALSE)
+ class(out) <- "spca"
- spcaX <- multispati(dudi=pcaX, listw=resCN, scannf=scannf, nfposi=nfposi, nfnega=nfnega)
+ return(out)
- nfposi <- spcaX$nfposi
- nfnega <- spcaX$nfnega
+}
- spcaX$xy <- xy
- rownames(spcaX$xy) <- rownames(spcaX$li)
- colnames(spcaX$xy) <- c("x","y")
- spcaX$lw <- resCN
- spcaX$call <- appel
- posaxes <- if(nfposi>0) {1:nfposi} else NULL
- negaxes <- if(nfnega>0) {(length(spcaX$eig)-nfnega+1):length(spcaX$eig)} else NULL
- keptaxes <- c(posaxes,negaxes)
- ## set names of different components
- colnames(spcaX$c1) <- paste("Axis",keptaxes)
- colnames(spcaX$li) <- paste("Axis",keptaxes)
- colnames(spcaX$ls) <- paste("Axis",keptaxes)
- row.names(spcaX$c1) <- colnames(X)
- colnames(spcaX$as) <- colnames(spcaX$c1)
- temp <- row.names(spcaX$as)
- row.names(spcaX$as) <- paste("PCA",temp)
- class(spcaX) <- "spca"
+###################
+## spca.data.frame
+###################
+
+spca.data.frame <- function(x, xy = NULL, cn = NULL, matWeight = NULL,
+ center = TRUE, scale = FALSE, scannf = TRUE,
+ nfposi = 1, nfnega = 1,
+ type = NULL, ask = TRUE,
+ plot.nb = TRUE, edit.nb = FALSE,
+ truenames = TRUE,
+ d1 = NULL, d2 = NULL, k = NULL,
+ a = NULL, dmin = NULL, ...) {
+
+ call <- match.call()
+
+ spca(as.matrix(x), xy = xy, cn = cn, matWeight = matWeight, center = center,
+ cale = scale, scannf = scannf, nfposi = nfposi, nfnega = nfnega,
+ type = type, ask = ask, plot.nb = plot.nb, edit.nb = edit.nb,
+ truenames = truenames, d1 = d1, d2 = d2, k = k, a = a, dmin = dmin,
+ call = call, ...)
+}
- return(spcaX)
-} # end spca
+
+
+
+
+################
+## spca genind
+################
+
+spca.genind <- function(obj, xy = NULL, cn = NULL, matWeight = NULL,
+ scale = FALSE, scannf = TRUE,
+ nfposi = 1, nfnega = 1,
+ type = NULL, ask = TRUE,
+ plot.nb = TRUE, edit.nb = FALSE,
+ truenames = TRUE,
+ d1 = NULL, d2 = NULL, k = NULL,
+ a = NULL, dmin = NULL, ...){
+
+ ## first checks
+
+ invisible(validObject(obj))
+
+
+ ## handle xy coordinates
+ if(is.null(xy) & !is.null(obj$other$xy)) {
+ xy <- obj$other$xy # xy from @other$xy if it exists
+ }
+
+
+
+ ## == spatial weights are done ==
+
+
+ ## handle NAs warning
+ if(any(is.na(obj at tab))){
+ warning("NAs in data are automatically replaced (to mean allele frequency)")
+ }
+
+ ## handle NAs, centring and scaling
+ X <- tab(obj, freq = TRUE, NA.method = "mean")
+
+ call <- match.call()
+
+ spca(X, xy = xy, cn = cn, matWeight = matWeight,
+ center = TRUE, scale = scale, scannf = scannf,
+ nfposi = nfposi, nfnega = nfnega,
+ type = type, ask = ask,
+ plot.nb = plot.nb, edit.nb = edit.nb,
+ truenames = truenames,
+ d1 = d1, d2 = d2, k = k,
+ a = a, dmin = dmin,
+ call = call, ...)
+
+} # end spca.genind
+
+
+
+
+
+
+################
+## spca genpop
+################
+
+spca.genpop <- function(obj, xy = NULL, cn = NULL, matWeight = NULL,
+ scale = FALSE, scannf = TRUE,
+ nfposi = 1, nfnega = 1,
+ type = NULL, ask = TRUE,
+ plot.nb = TRUE, edit.nb = FALSE,
+ truenames = TRUE,
+ d1 = NULL, d2 = NULL, k = NULL,
+ a = NULL, dmin = NULL, ...){
+
+ ## first checks
+
+ invisible(validObject(obj))
+
+
+ ## handle xy coordinates
+ if(is.null(xy) & !is.null(obj$other$xy)) {
+ xy <- obj$other$xy # xy from @other$xy if it exists
+ }
+
+
+ ## handle NAs warning
+ if(any(is.na(obj at tab))){
+ warning("NAs in data are automatically replaced (to mean allele frequency)")
+ }
+
+ ## handle NAs, centring and scaling
+ X <- tab(obj, freq = TRUE, NA.method = "mean")
+
+ call <- match.call()
+
+ spca(X, xy = xy, cn = cn, matWeight = matWeight,
+ center = TRUE, scale = scale, scannf = scannf,
+ nfposi = nfposi, nfnega = nfnega,
+ type = type, ask = ask,
+ plot.nb = plot.nb, edit.nb = edit.nb,
+ truenames = truenames,
+ d1 = d1, d2 = d2, k = k,
+ a = a, dmin = dmin,
+ call = call, ...)
+
+} # end spca.genpop
+
######################
-# Function print.spca
+## Function print.spca
######################
+
print.spca <- function(x, ...){
cat("\t########################################\n")
cat("\t# spatial Principal Component Analysis #\n")
@@ -161,11 +330,18 @@ print.spca <- function(x, ...){
class(sumry) <- "table"
print(sumry)
cat("\n")
- sumry <- array("", c(4, 4), list(1:4, c("data.frame", "nrow", "ncol", "content")))
- sumry[1, ] <- c("$c1", nrow(x$c1), ncol(x$c1), "principal axes: scaled vectors of alleles loadings")
- sumry[2, ] <- c("$li", nrow(x$li), ncol(x$li), "principal components: coordinates of entities ('scores')")
- sumry[3, ] <- c("$ls", nrow(x$ls), ncol(x$ls), 'lag vector of principal components')
- sumry[4, ] <- c("$as", nrow(x$as), ncol(x$as), 'pca axes onto spca axes')
+ sumry <- array("", c(5, 4),
+ list(1:5, c("data.frame", "nrow", "ncol", "content")))
+ sumry[1, ] <- c("$tab", nrow(x$tab), ncol(x$tab),
+ "transformed data: optionally centred / scaled")
+ sumry[2, ] <- c("$c1", nrow(x$c1), ncol(x$c1),
+ "principal axes: scaled vectors of alleles loadings")
+ sumry[3, ] <- c("$li", nrow(x$li), ncol(x$li),
+ "principal components: coordinates of entities ('scores')")
+ sumry[4, ] <- c("$ls", nrow(x$ls), ncol(x$ls),
+ "lag vector of principal components")
+ sumry[5, ] <- c("$as", nrow(x$as), ncol(x$as),
+ "pca axes onto spca axes")
class(sumry) <- "table"
print(sumry)
@@ -184,7 +360,7 @@ print.spca <- function(x, ...){
########################
-# Function summary.spca
+## Function summary.spca
########################
summary.spca <- function (object, ..., printres=TRUE) {
if (!inherits(object, "spca"))stop("to be used with 'spca' object")
@@ -278,7 +454,7 @@ summary.spca <- function (object, ..., printres=TRUE) {
nfnegamax <- sum(eig < 0)
ms <- multispati(dudi=dudi, listw=lw, scannf=FALSE,
- nfposi=nfposimax, nfnega=nfnegamax)
+ nfposi=nfposimax, nfnega=nfnegamax)
ndim <- dudi$rank
nf <- nfposi + nfnega
@@ -301,86 +477,86 @@ summary.spca <- function (object, ..., printres=TRUE) {
#####################
-# Function plot.spca
+## Function plot.spca
#####################
plot.spca <- function (x, axis = 1, useLag=FALSE, ...){
- if (!inherits(x, "spca")) stop("Use only with 'spca' objects.")
-
- if(axis>ncol(x$li)) stop("wrong axis required.")
-
- opar <- par(no.readonly = TRUE)
- on.exit(par(opar))
- par(mar = rep(.1,4), mfrow=c(3,2))
-
- n <- nrow(x$li)
- xy <- x$xy
-
- ## handle useLag argument
- if(useLag){
- z <- x$ls[,axis]
- } else {
- z <- x$li[,axis]
- } # end if useLag
- nfposi <- x$nfposi
- nfnega <- x$nfnega
- ## handle neig parameter - hide cn if nore than 100 links
- nLinks <- sum(card(x$lw$neighbours))
- if(nLinks < 500) {
- neig <- nb2neig(x$lw$neighbours)
- } else {
- neig <- NULL
- }
+ if (!inherits(x, "spca")) stop("Use only with 'spca' objects.")
+
+ if(axis>ncol(x$li)) stop("wrong axis required.")
+
+ opar <- par(no.readonly = TRUE)
+ on.exit(par(opar))
+ par(mar = rep(.1,4), mfrow=c(3,2))
+
+ n <- nrow(x$li)
+ xy <- x$xy
+
+ ## handle useLag argument
+ if(useLag){
+ z <- x$ls[,axis]
+ } else {
+ z <- x$li[,axis]
+ } # end if useLag
+ nfposi <- x$nfposi
+ nfnega <- x$nfnega
+ ## handle neig parameter - hide cn if nore than 100 links
+ nLinks <- sum(card(x$lw$neighbours))
+ if(nLinks < 500) {
+ neig <- nb2neig(x$lw$neighbours)
+ } else {
+ neig <- NULL
+ }
- sub <- paste("Score",axis)
- csub <- 2
-
- # 1
- if(n<30) clab <- 1 else clab <- 0
- s.label(xy, clabel=clab, include.origin=FALSE, addaxes=FALSE, neig=neig,
- cneig=1, sub="Connection network", csub=2)
-
- # 2
- s.image(xy,z, include.origin=FALSE, grid=TRUE, kgrid=10, cgrid=1,
- sub=sub, csub=csub, possub="bottomleft")
- box()
-
- # 3
- if(n<30) {neig <- nb2neig(x$lw$neighbours)} else {neig <- NULL}
- s.value(xy,z, include.origin=FALSE, addaxes=FALSE, clegend=0, csize=.6,
- neig=neig, sub=sub, csub=csub, possub="bottomleft")
-
- # 4
- s.value(xy,z, include.origin=FALSE, addaxes=FALSE, clegend=0, csize=.6,
- method="greylevel", neig=neig, sub=sub, csub=csub, possub="bottomleft")
-
- # 5
- omar <- par("mar")
- par(mar = c(0.8, 2.8, 0.8, 0.8))
- m <- length(x$eig)
- col.w <- rep("white", m) # elles sont toutes blanches
- col.w[1:nfposi] <- "grey"
- if (nfnega>0) {col.w[m:(m-nfnega+1)] <- "grey"}
- j <- axis
- if (j>nfposi) {j <- j-nfposi +m -nfnega}
- col.w[j] <- "black"
- barplot(x$eig, col = col.w)
- scatterutil.sub(cha ="Eigenvalues", csub = 2.5, possub = "topright")
- par(mar=rep(.1,4))
- box()
- par(mar=omar)
-
- # 6
- par(mar=c(4,4,2,1))
- screeplot(x,main="Eigenvalues decomposition")
- par(mar=rep(.1,4))
- box()
- return(invisible(match.call()))
+ sub <- paste("Score",axis)
+ csub <- 2
+
+ # 1
+ if(n<30) clab <- 1 else clab <- 0
+ s.label(xy, clabel=clab, include.origin=FALSE, addaxes=FALSE, neig=neig,
+ cneig=1, sub="Connection network", csub=2)
+
+ # 2
+ s.image(xy,z, include.origin=FALSE, grid=TRUE, kgrid=10, cgrid=1,
+ sub=sub, csub=csub, possub="bottomleft")
+ box()
+
+ # 3
+ if(n<30) {neig <- nb2neig(x$lw$neighbours)} else {neig <- NULL}
+ s.value(xy,z, include.origin=FALSE, addaxes=FALSE, clegend=0, csize=.6,
+ neig=neig, sub=sub, csub=csub, possub="bottomleft")
+
+ # 4
+ s.value(xy,z, include.origin=FALSE, addaxes=FALSE, clegend=0, csize=.6,
+ method="greylevel", neig=neig, sub=sub, csub=csub, possub="bottomleft")
+
+ # 5
+ omar <- par("mar")
+ par(mar = c(0.8, 2.8, 0.8, 0.8))
+ m <- length(x$eig)
+ col.w <- rep("white", m) # elles sont toutes blanches
+ col.w[1:nfposi] <- "grey"
+ if (nfnega>0) {col.w[m:(m-nfnega+1)] <- "grey"}
+ j <- axis
+ if (j>nfposi) {j <- j-nfposi +m -nfnega}
+ col.w[j] <- "black"
+ barplot(x$eig, col = col.w)
+ scatterutil.sub(cha ="Eigenvalues", csub = 2.5, possub = "topright")
+ par(mar=rep(.1,4))
+ box()
+ par(mar=omar)
+
+ # 6
+ par(mar=c(4,4,2,1))
+ screeplot(x,main="Eigenvalues decomposition")
+ par(mar=rep(.1,4))
+ box()
+ return(invisible(match.call()))
}
##########################
-# Function screeplot.spca
+## Function screeplot.spca
##########################
screeplot.spca <- function(x,...,main=NULL){
@@ -421,22 +597,22 @@ screeplot.spca <- function(x,...,main=NULL){
###################
-# colorplot method
+## colorplot method
###################
colorplot.spca <- function(x, axes=1:ncol(x$li), useLag=FALSE, ...){
- ## some checks
- if(!any(inherits(x,"spca"))) stop("x in not a spca object.")
+ ## some checks
+ if(!any(inherits(x,"spca"))) stop("x in not a spca object.")
- ## get args to be passed to colorplot
- xy <- x$xy
+ ## get args to be passed to colorplot
+ xy <- x$xy
- if(useLag) {
- X <- as.matrix(x$ls)
- } else {
- X <- as.matrix(x$li)
- }
+ if(useLag) {
+ X <- as.matrix(x$ls)
+ } else {
+ X <- as.matrix(x$li)
+ }
- ## call to colorplot
- colorplot(xy, X, axes, ...)
+ ## call to colorplot
+ colorplot(xy, X, axes, ...)
} # end colorplot.spca
diff --git a/R/spca_randtest.R b/R/spca_randtest.R
new file mode 100644
index 0000000..0a8eeb7
--- /dev/null
+++ b/R/spca_randtest.R
@@ -0,0 +1,92 @@
+#' Monte Carlo test for sPCA
+#'
+#' The function \code{spca_randtest} implements Monte-Carlo tests for the
+#' presence of significant spatial structures in a sPCA object. Two tests are
+#' run, for global (positive autocorrelation) and local (negative
+#' autocorrelation) structures, respectively. The test statistics used are the
+#' sum of the absolute values of the corresponding eigenvalues.
+#'
+#' @export
+#'
+#' @author Original code by Valeria Montano adapted by Thibaut Jombart.
+#'
+#' @param x A \code{\link{spca}} object.
+#'
+#' @param nperm The number of permutations to be used for the test.
+#'
+#' @return
+#'
+#' A list with two objects of the class 'randtest' (see
+#' \code{\link[ade4]{as.randtest}}), the first one for 'global' structures
+#' (positivie autocorrelation) and the second for 'local' structures (negative
+#' autocorrelation).
+#'
+#' @examples
+#'
+#' \dontrun{
+#' ## Load data
+#' data(sim2pop)
+#'
+#' ## Make spca
+#' spca1 <- spca(sim2pop, type = 1, scannf = FALSE, plot.nb = FALSE)
+#'
+#' spca1
+#' plot(spca1)
+#'
+#' ## run tests (use more permutations in practice, e.g. 999)
+#' tests <- spca_randtest(spca1, nperm = 49)
+#'
+#' ## check results
+#' tests
+#' plot(tests[[1]]) # global structures
+# plot(tests[[2]]) # local structures
+#'
+#' }
+#'
+spca_randtest <-function(x, nperm = 499){
+
+ if(!inherits(x, "spca")){
+ stop("x must be an spca object")
+ }
+
+
+ ## This function compute the test statistics for a given data object. Two test
+ ## statistics are computed, from the eigenvalues of the sPCA, called 'lambda':
+
+ ## sum(lambda >= 0)
+ ## sum(lambda < 0)
+
+ get_stats <- function(obj){
+ obj_pca <- ade4::dudi.pca(obj, center = FALSE, scale = FALSE,
+ scannf = FALSE)
+ obj_spca <- ade4::multispati(dudi = obj_pca,
+ listw = x$lw, scannf = FALSE,
+ nfposi = 1, nfnega = 1)
+ lambda <- obj_spca$eig
+ lambda_pos <- lambda[lambda >= 0]
+ lambda_neg <- lambda[lambda < 0]
+ stats <- c(pos = sum(lambda_pos),
+ neg = sum(abs(lambda_neg)))
+
+ return(stats)
+ }
+
+
+ ## This function permutes individuals (rows) in the dataset.
+
+ perm_data <- function(obj = x$tab){
+ obj[sample(1:nrow(obj)), , drop = FALSE]
+ }
+
+ sims <- vapply(seq_len(nperm),
+ function(i) get_stats(perm_data()),
+ double(2))
+
+ obs <- get_stats(x$tab)
+
+ pos_test <- as.randtest(sim = sims[1,], obs = obs[1], alter = "greater")
+ neg_test <- as.randtest(sim = sims[2,], obs = obs[2], alter = "greater")
+
+ list(global = pos_test, local = neg_test)
+}
+
diff --git a/R/xvalDapc.R b/R/xvalDapc.R
index 42150ac..0e0ee7e 100644
--- a/R/xvalDapc.R
+++ b/R/xvalDapc.R
@@ -1,4 +1,7 @@
+
+xvalDapc <- function (x, ...) UseMethod("xvalDapc")
+
##############
## xvalDapc ##
##############
@@ -48,14 +51,19 @@
new_grp <- x$GRP[-x$KEEP]
train_grp <- x$GRP[x$KEEP]
-
+ dapclist <- list(train_dat,
+ train_grp,
+ n.pca = n.pca,
+ n.da = n.da,
+ dudi = x$PCA)
+ temp.dapc <- suppressWarnings(do.call("dapc", dapclist))
temp.dapc <- suppressWarnings(dapc(train_dat, train_grp, dudi = x$PCA,
n.pca = n.pca, n.da = n.da))
temp.pred <- predict.dapc(temp.dapc, newdata = new_dat)
- if (result=="overall"){
+ if (identical(result, "overall")){
out <- mean(temp.pred$assign == new_grp)
}
- if (result=="groupMean"){
+ if (identical(result, "groupMean")){
out <- mean(tapply(temp.pred$assign == new_grp, new_grp, mean), na.rm = TRUE)
}
}
@@ -100,7 +108,7 @@
##############
## xvalDapc ##
##############
-xvalDapc <- function(x, grp, n.pca.max = 300, n.da = NULL, training.set = 0.9,
+xvalDapc.default <- function(x, grp, n.pca.max = 300, n.da = NULL, training.set = 0.9,
result = c("groupMean", "overall"), center = TRUE, scale = FALSE,
n.pca = NULL, n.rep = 30, xval.plot = TRUE, ...){
@@ -172,9 +180,10 @@ xvalDapc <- function(x, grp, n.pca.max = 300, n.da = NULL, training.set = 0.9,
## GET FULL PCA ##
if(missing(n.pca.max)) n.pca.max <- min(dim(x))
- pcaX <- dudi.pca(x, nf=n.pca.max, scannf=FALSE, center=center, scale=scale)
+
+ pcaX <- dudi.pca(x, nf=n.pca.max, scannf=FALSE, center=center, scale=scale)
n.pca.max <- min(n.pca.max, pcaX$rank, N.training-1) # re-defines n.pca.max (so user's input may not be the value used...)
-
+
## DETERMINE N.PCA IF NEEDED ##
if(n.pca.max < 10){
runs <- n.pca.max
@@ -252,6 +261,12 @@ xvalDapc <- function(x, grp, n.pca.max = 300, n.da = NULL, training.set = 0.9,
} # end xvalDapc.data.frame
-xvalDapc.data.frame <- xvalDapc
+xvalDapc.data.frame <- xvalDapc.default
xvalDapc.matrix <- xvalDapc.data.frame
+xvalDapc.genlight <- function(x, ...){
+ xvalDapc.matrix(as.matrix(x), ...)
+}
+xvalDapc.genind <- function(x, ...){
+ xvalDapc.matrix(tab(x), ...)
+}
diff --git a/R/zzz.R b/R/zzz.R
index 7f07db2..c2d0f3e 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -1,10 +1,17 @@
.onAttach <- function(libname, pkgname){
- pkg.version <- packageDescription("adegenet", fields = "Version")
+ # adegenet specific options -----------------------------------------------
+ op <- options()
+ op.adegenet <- list(
+ adegenet.testcon = stdin() # for readLines, read from stdin. This allows it to be changed for tests.
+ )
+ toset <- !(names(op.adegenet) %in% names(op))
+ if(any(toset)) options(op.adegenet[toset])
+ # startup message ---------------------------------------------------------
+ pkg.version <- packageDescription("adegenet", fields = "Version")
startup.txt <- paste("\n /// adegenet ", pkg.version, " is loaded ////////////",
"\n\n > overview: '?adegenet'",
"\n > tutorials/doc/questions: 'adegenetWeb()' ",
"\n > bug reports/feature requests: adegenetIssues()\n\n", sep="")
-
packageStartupMessage(startup.txt)
}
diff --git a/README.md b/README.md
index c30fce0..9ceebe0 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,11 @@
[![Travis-CI Build Status](https://travis-ci.org/thibautjombart/adegenet.png?branch=master)](https://travis-ci.org/thibautjombart/adegenet)
+[![Build status](https://ci.appveyor.com/api/projects/status/l7a9k1saqnshakk9/branch/master?svg=true)](https://ci.appveyor.com/project/thibautjombart/adegenet/branch/master)
+
+[![CRAN Status Badge](http://www.r-pkg.org/badges/version/adegenet)](https://cran.r-project.org/package=adegenet)
+
+[![CRAN Downloads](https://cranlogs.r-pkg.org/badges/adegenet)](https://cran.r-project.org/package=adegenet)
+
adegenet
========
*adegenet*: a R Package for the Multivariate Analysis of Genetic Markers
diff --git a/data/H3N2.rda b/data/H3N2.rda
index a41f656..d407c7a 100644
Binary files a/data/H3N2.rda and b/data/H3N2.rda differ
diff --git a/data/dapcIllus.rda b/data/dapcIllus.rda
index f11e84a..45b61d8 100644
Binary files a/data/dapcIllus.rda and b/data/dapcIllus.rda differ
diff --git a/data/datalist b/data/datalist
index fcf4fb0..2e8f724 100644
--- a/data/datalist
+++ b/data/datalist
@@ -1,6 +1,7 @@
H3N2
dapcIllus
eHGDP
+hybridtoy
microbov
nancycats
rupica
diff --git a/data/eHGDP.rda b/data/eHGDP.rda
index 239e172..8ec7bbf 100644
Binary files a/data/eHGDP.rda and b/data/eHGDP.rda differ
diff --git a/data/hybridtoy.RData b/data/hybridtoy.RData
new file mode 100644
index 0000000..f8b02df
Binary files /dev/null and b/data/hybridtoy.RData differ
diff --git a/data/microbov.rda b/data/microbov.rda
index d890314..9f1625d 100644
Binary files a/data/microbov.rda and b/data/microbov.rda differ
diff --git a/data/nancycats.rda b/data/nancycats.rda
index 330e044..b13f835 100644
Binary files a/data/nancycats.rda and b/data/nancycats.rda differ
diff --git a/data/rupica.RData b/data/rupica.RData
new file mode 100644
index 0000000..af56325
Binary files /dev/null and b/data/rupica.RData differ
diff --git a/data/rupica.rda b/data/rupica.rda
deleted file mode 100644
index a93c6c9..0000000
Binary files a/data/rupica.rda and /dev/null differ
diff --git a/data/sim2pop.rda b/data/sim2pop.rda
index cf6afa2..400f9b3 100644
Binary files a/data/sim2pop.rda and b/data/sim2pop.rda differ
diff --git a/data/spcaIllus.rda b/data/spcaIllus.rda
index 8b10b1d..336b731 100644
Binary files a/data/spcaIllus.rda and b/data/spcaIllus.rda differ
diff --git a/man/AIC.snapclust.Rd b/man/AIC.snapclust.Rd
new file mode 100644
index 0000000..058319a
--- /dev/null
+++ b/man/AIC.snapclust.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/AIC.snapclust.R
+\name{AIC.snapclust}
+\alias{AIC.snapclust}
+\title{Compute Akaike Information Criterion (AIC) for snapclust}
+\usage{
+\method{AIC}{snapclust}(object, ...)
+}
+\arguments{
+\item{object}{An object returned by the function \code{\link{snapclust}}.}
+
+\item{...}{Further arguments for compatibility with the \code{AIC} generic
+(currently not used).}
+}
+\description{
+Do not use. We work on that stuff. Contact us if interested.
+}
+\seealso{
+\code{\link{snapclust}} to generate clustering solutions.
+}
+\author{
+Thibaut Jombart \email{thibautjombart at gmail.com}
+}
diff --git a/man/AICc.Rd b/man/AICc.Rd
new file mode 100644
index 0000000..4ada56f
--- /dev/null
+++ b/man/AICc.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/AICc.snapclust.R
+\name{AICc}
+\alias{AICc}
+\alias{AICc.snapclust}
+\title{Compute Akaike Information Criterion for small samples (AICc) for snapclust}
+\usage{
+AICc(object, ...)
+
+\method{AICc}{snapclust}(object, ...)
+}
+\arguments{
+\item{object}{An object returned by the function \code{\link{snapclust}}.}
+
+\item{...}{Further arguments for compatibility with the \code{AIC} generic
+(currently not used).}
+}
+\description{
+Do not use. We work on that stuff. Contact us if interested.
+}
+\seealso{
+\code{\link{snapclust}} to generate clustering solutions.
+}
+\author{
+Thibaut Jombart \email{thibautjombart at gmail.com}
+}
diff --git a/man/BIC.snapclust.Rd b/man/BIC.snapclust.Rd
new file mode 100644
index 0000000..c9b5f5e
--- /dev/null
+++ b/man/BIC.snapclust.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/BIC.snapclust.R
+\name{BIC.snapclust}
+\alias{BIC.snapclust}
+\title{Compute Bayesian Information Criterion (BIC) for snapclust}
+\usage{
+\method{BIC}{snapclust}(object, ...)
+}
+\arguments{
+\item{object}{An object returned by the function \code{\link{snapclust}}.}
+
+\item{...}{Further arguments for compatibility with the \code{BIC} generic
+(currently not used).}
+}
+\description{
+Do not use. We work on that stuff. Contact us if interested.
+}
+\seealso{
+\code{\link{snapclust}} to generate clustering solutions.
+}
+\author{
+Thibaut Jombart \email{thibautjombart at gmail.com}
+}
diff --git a/man/H3N2.Rd b/man/H3N2.Rd
index b2fdcd7..ea87c63 100644
--- a/man/H3N2.Rd
+++ b/man/H3N2.Rd
@@ -3,10 +3,10 @@
\docType{data}
\name{H3N2}
\alias{H3N2}
-\alias{USflu}
-\alias{USflu.fasta}
\alias{usflu}
\alias{usflu.fasta}
+\alias{USflu}
+\alias{USflu.fasta}
\title{Seasonal influenza (H3N2) HA segment data}
\format{\code{H3N2} is a genind object with several data frame as
supplementary components (\code{H3N2 at other) slort}, which contains the
@@ -85,4 +85,3 @@ of principal components: a new method for the analysis of genetically
structured populations. Submitted to \emph{BMC genetics}.
}
\keyword{datasets}
-
diff --git a/man/Hs.Rd b/man/Hs.Rd
index 108068d..e58f90c 100644
--- a/man/Hs.Rd
+++ b/man/Hs.Rd
@@ -36,10 +36,9 @@ Hs(genind2genpop(nancycats))
}
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\seealso{
\code{\link{Hs.test}} to test differences in Hs between two groups
}
-
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
diff --git a/man/Hs.test.Rd b/man/Hs.test.Rd
index f21d8f8..0cef673 100644
--- a/man/Hs.test.Rd
+++ b/man/Hs.test.Rd
@@ -42,11 +42,10 @@ plot(test)
}
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\seealso{
\code{\link{Hs}} to compute Hs for different populations;
\code{\link[ade4]{as.randtest}} for the class of Monte Carlo tests.
}
-
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
diff --git a/man/KIC.Rd b/man/KIC.Rd
new file mode 100644
index 0000000..1c36430
--- /dev/null
+++ b/man/KIC.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/KIC.snapclust.R
+\name{KIC}
+\alias{KIC}
+\alias{KIC.snapclust}
+\title{Compute Akaike Information Criterion for small samples (AICc) for snapclust}
+\usage{
+KIC(object, ...)
+
+\method{KIC}{snapclust}(object, ...)
+}
+\arguments{
+\item{object}{An object returned by the function \code{\link{snapclust}}.}
+
+\item{...}{Further arguments for compatibility with the \code{AIC} generic
+(currently not used).}
+}
+\description{
+Do not use. We work on that stuff. Contact us if interested.
+}
+\seealso{
+\code{\link{snapclust}} to generate clustering solutions.
+}
+\author{
+Thibaut Jombart \email{thibautjombart at gmail.com}
+}
diff --git a/man/adegenet.package.Rd b/man/adegenet.package.Rd
index 63bc2bb..68488f3 100644
--- a/man/adegenet.package.Rd
+++ b/man/adegenet.package.Rd
@@ -3,8 +3,8 @@
\docType{package}
\encoding{utf-8}
\name{adegenet.package}
-\alias{adegenet}
\alias{adegenet.package}
+\alias{adegenet}
\alias{adegenet.package-package}
\title{The adegenet package}
\description{
@@ -99,34 +99,34 @@ from a population whose sample size is above a given level.\cr -
\code{\link{pop}} sets the population of a set of genotypes.\cr
=== ANALYZING DATA ===\cr Several functions allow to use usual, and less
-usual analyses:\cr - \code{\link{HWE.test.genind}}: performs HWE test for
-all populations and loci combinations \cr - \code{\link{dist.genpop}}: computes 5 genetic distances among populations.
-\cr - \code{\link{monmonier}}: implementation of the Monmonier algorithm,
-used to seek genetic boundaries among individuals or populations. Optimized
-boundaries can be obtained using \code{\link{optimize.monmonier}}. Object of
-the class \code{monmonier} can be plotted and printed using the
-corresponding methods. \cr - \code{\link{spca}}: implements Jombart et al.
-(2008) spatial Principal Component Analysis \cr -
-\code{\link{global.rtest}}: implements Jombart et al. (2008) test for global
-spatial structures \cr - \code{\link{local.rtest}}: implements Jombart et
-al. (2008) test for local spatial structures \cr - \code{\link{propShared}}:
-computes the proportion of shared alleles in a set of genotypes (i.e. from a
-genind object)\cr - \code{\link{propTyped}}: function to investigate missing
-data in several ways \cr - \code{\link{scaleGen}}: generic method to scale
-\linkS4class{genind} or \linkS4class{genpop} before a principal component
-analysis \cr - \code{\link{Hs}}: computes the average expected
-heterozygosity by population in a \linkS4class{genpop}. Classically Used as
-a measure of genetic diversity.\cr - \code{\link{find.clusters}} and
-\code{\link{dapc}}: implement the Discriminant Analysis of Principal
-Component (DAPC, Jombart et al., 2010).\cr - \code{\link{seqTrack}}:
-implements the SeqTrack algorithm for recontructing transmission trees of
-pathogens (Jombart et al., 2010) .\cr \code{\link{glPca}}: implements PCA
-for \linkS4class{genlight} objects.\cr - \code{\link{gengraph}}: implements
-some simple graph-based clustering using genetic data. -
-\code{\link{snpposi.plot}} and \code{\link{snpposi.test}}: visualize the
-distribution of SNPs on a genetic sequence and test their randomness. -
-\code{\link{adegenetServer}}: opens up a web interface for some
-functionalities of the package (DAPC with cross validation and feature
+usual analyses:\cr - \code{\link{HWE.test.genind}}: performs HWE test for all
+populations and loci combinations \cr - \code{\link{dist.genpop}}: computes 5
+genetic distances among populations. \cr - \code{\link{monmonier}}:
+implementation of the Monmonier algorithm, used to seek genetic boundaries
+among individuals or populations. Optimized boundaries can be obtained using
+\code{\link{optimize.monmonier}}. Object of the class \code{monmonier} can be
+plotted and printed using the corresponding methods. \cr -
+\code{\link{spca}}: implements Jombart et al. (2008) spatial Principal
+Component Analysis \cr - \code{\link{global.rtest}}: implements Jombart et
+al. (2008) test for global spatial structures \cr -
+\code{\link{local.rtest}}: implements Jombart et al. (2008) test for local
+spatial structures \cr - \code{\link{propShared}}: computes the proportion of
+shared alleles in a set of genotypes (i.e. from a genind object)\cr -
+\code{\link{propTyped}}: function to investigate missing data in several ways
+\cr - \code{\link{scaleGen}}: generic method to scale \linkS4class{genind} or
+\linkS4class{genpop} before a principal component analysis \cr -
+\code{\link{Hs}}: computes the average expected heterozygosity by population
+in a \linkS4class{genpop}. Classically Used as a measure of genetic
+diversity.\cr - \code{\link{find.clusters}} and \code{\link{dapc}}: implement
+the Discriminant Analysis of Principal Component (DAPC, Jombart et al.,
+2010).\cr - \code{\link{seqTrack}}: implements the SeqTrack algorithm for
+recontructing transmission trees of pathogens (Jombart et al., 2010) .\cr
+\code{\link{glPca}}: implements PCA for \linkS4class{genlight} objects.\cr -
+\code{\link{gengraph}}: implements some simple graph-based clustering using
+genetic data. - \code{\link{snpposi.plot}} and \code{\link{snpposi.test}}:
+visualize the distribution of SNPs on a genetic sequence and test their
+randomness. - \code{\link{adegenetServer}}: opens up a web interface for
+some functionalities of the package (DAPC with cross validation and feature
selection).\cr
=== GRAPHICS ===\cr - \code{\link{colorplot}}: plots points with associated
@@ -162,16 +162,6 @@ Tutorials are available via the command \code{adegenetTutorial}.\cr
To cite adegenet, please use the reference given by
\code{citation("adegenet")} (or see references below).
}
-\author{
-Thibaut Jombart <t.jombart at imperial.ac.uk>\cr
-Developers: Zhian N. Kamvar <zkamvar at gmail.com>,
-Caitlin Collins <caitiecollins17 at gmail.com>,
-Ismail Ahmed <ismail.ahmed at inserm.fr>,
-Federico Calboli, Tobias Erik Reiners, Peter
-Solymos, Anne Cori, \cr Contributed datasets from: Katayoun
-Moazami-Goudarzi, Denis Laloë, Dominique Pontier, Daniel Maillard, Francois
-Balloux.
-}
\references{
Jombart T. (2008) adegenet: a R package for the multivariate
analysis of genetic markers \emph{Bioinformatics} 24: 1403-1405. doi:
@@ -205,6 +195,15 @@ genetics tools\cr - \code{ape} for phylogenetics and DNA data handling\cr -
\code{seqinr} for handling nucleic and proteic sequences\cr - \code{shiny}
for R-based web interfaces\cr
}
+\author{
+Thibaut Jombart <t.jombart at imperial.ac.uk>\cr
+Developers: Zhian N. Kamvar <zkamvar at gmail.com>,
+Caitlin Collins <caitiecollins17 at gmail.com>,
+Ismail Ahmed <ismail.ahmed at inserm.fr>,
+Federico Calboli, Tobias Erik Reiners, Peter
+Solymos, Anne Cori, \cr Contributed datasets from: Katayoun
+Moazami-Goudarzi, Denis Laloë, Dominique Pontier, Daniel Maillard, Francois
+Balloux.
+}
\keyword{manip}
\keyword{multivariate}
-
diff --git a/man/auxil.Rd b/man/auxil.Rd
index 2d07dd1..6f043fd 100644
--- a/man/auxil.Rd
+++ b/man/auxil.Rd
@@ -22,6 +22,8 @@
\alias{spectral}
\alias{wasp}
\alias{funky}
+\alias{virid}
+\alias{hybridpal}
\title{ Auxiliary functions for adegenet}
\description{
@@ -64,10 +66,19 @@
\item \code{spectral}: red -> yellow -> blue (RColorBrewer variant)
\item \code{wasp}: gold -> brown -> black
\item \code{funky}: many colors
+ \item \code{virid}: adaptation of the \code{viridis} palette, from
+ the \code{viridis} package.
+ \item \code{hybridpal}: reorder a color palette (\code{virid} by
+ default) to display sharp contrast between the first two colors, and
+ interpolated colors after; ideal for datasets where two parental
+ populations are provided first, followed by various degrees of
+ hybrids.
+
}
}
\seealso{
- The R package RColorBrewer, proposing a nice selection of color palettes.
+ The R package RColorBrewer, proposing a nice selection of color
+ palettes. The \code{viridis} package, with many excellent palettes.
}
\usage{
.genlab(base, n)
@@ -78,6 +89,7 @@ num2col(x, col.pal=heat.colors, reverse=FALSE,
fac2col(x, col.pal=funky, na.col="transparent", seed=NULL)
any2col(x, col.pal=seasun, na.col="transparent")
transp(col, alpha=.5)
+hybridpal(col.pal = virid)
}
\arguments{
\item{base}{a character string forming the base of the labels}
@@ -127,6 +139,7 @@ plot(1:100, col=num2col(1:100, col.pal=flame), pch=20, cex=4)
plot(1:100, col=num2col(1:100, col.pal=wasp), pch=20, cex=4)
plot(1:100, col=num2col(1:100, col.pal=azur,rev=TRUE), pch=20, cex=4)
plot(1:100, col=num2col(1:100, col.pal=spectral), pch=20, cex=4)
+plot(1:100, col=num2col(1:100, col.pal=virid), pch=20, cex=4)
## factor as colors using fac2col
dat <- cbind(c(rnorm(50,8), rnorm(100), rnorm(150,3),
@@ -148,4 +161,4 @@ barplot(x, col=col.info$col, main="Use of any2col on a numeric")
legend("bottomleft", fill=col.info$leg.col, legend=col.info$leg.txt, bg="white")
}
-\keyword{manip}
\ No newline at end of file
+\keyword{manip}
diff --git a/man/chooseCN.Rd b/man/chooseCN.Rd
index 481ea14..cdaace4 100644
--- a/man/chooseCN.Rd
+++ b/man/chooseCN.Rd
@@ -6,7 +6,7 @@
\usage{
chooseCN(xy, ask = TRUE, type = NULL, result.type = "nb", d1 = NULL,
d2 = NULL, k = NULL, a = NULL, dmin = NULL, plot.nb = TRUE,
- edit.nb = FALSE)
+ edit.nb = FALSE, check.duplicates = TRUE)
}
\arguments{
\item{xy}{an matrix or data.frame with two columns for x and y coordinates.}
@@ -41,6 +41,8 @@ plotted (TRUE, default) or not (FALSE).}
\item{edit.nb}{a logical stating whether the resulting graph should be
edited manually for corrections (TRUE) or not (FALSE, default).}
+
+\item{check.duplicates}{a logical indicating if duplicate coordinates should be detected; this can be an issue for some graphs; TRUE by default.}
}
\value{
Returns a connection network having the class \code{nb} or
@@ -85,12 +87,11 @@ par(mfrow=c(1,1))
}
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\seealso{
\code{\link{spca}}
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{spatial}
\keyword{utilities}
-
diff --git a/man/compoplot.Rd b/man/compoplot.Rd
new file mode 100644
index 0000000..015e107
--- /dev/null
+++ b/man/compoplot.Rd
@@ -0,0 +1,74 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compoplot.R
+\name{compoplot}
+\alias{compoplot}
+\alias{compoplot.matrix}
+\alias{compoplot.dapc}
+\alias{compoplot.snapclust}
+\title{Genotype composition plot}
+\usage{
+compoplot(x, ...)
+
+\method{compoplot}{matrix}(x, col.pal = funky, border = NA, subset = NULL,
+ show.lab = FALSE, lab = rownames(x), legend = TRUE,
+ txt.leg = colnames(x), n.col = 4, posi = NULL, cleg = 0.8,
+ bg = transp("white"), ...)
+
+\method{compoplot}{dapc}(x, only.grp = NULL, border = NA, ...)
+
+\method{compoplot}{snapclust}(x, border = NA, ...)
+}
+\arguments{
+\item{x}{an object to be used for plotting (see description)}
+
+\item{...}{further arguments to be passed to \code{barplot}}
+
+\item{col.pal}{a color palette to be used for the groups; defaults to \code{funky}}
+
+\item{border}{a color for the border of the barplot; use \code{NA} to
+indicate no border.}
+
+\item{subset}{a subset of individuals to retain}
+
+\item{show.lab}{a logical indicating if individual labels should be displayed}
+
+\item{lab}{a vector of individual labels; if NULL, row.names of the matrix are used}
+
+\item{legend}{a logical indicating whether a legend should be provided for the colors}
+
+\item{txt.leg}{a character vector to be used for the legend}
+
+\item{n.col}{the number of columns to be used for the legend}
+
+\item{posi}{the position of the legend}
+
+\item{cleg}{a size factor for the legend}
+
+\item{bg}{the background to be used for the legend}
+
+\item{only.grp}{a subset of groups to retain}
+}
+\description{
+The compoplot uses a barplot to represent the group assignment probability of
+individuals to several groups. It is a generic with methods for the following
+objects:
+}
+\details{
+\itemize{
+
+\item \code{matrix}: a matrix with individuals in row and genetic clusters in
+column, each entry being an assignment probability of the corresponding
+individual to the corresponding group
+
+\item \code{dapc}: the output of the \code{dapc} function; in this case,
+group assignments are based upon geometric criteria in the discriminant space
+
+\item \code{snapclust}: the output of the \code{snapclust} function; in
+this case, group assignments are based upon the likelihood of genotypes
+belonging to their groups
+
+}
+}
+\author{
+Thibaut Jombart \email{thibautjombart at gmail.com}
+}
diff --git a/man/dapcGraphics.Rd b/man/dapcGraphics.Rd
index 72bdb0f..77c42c3 100644
--- a/man/dapcGraphics.Rd
+++ b/man/dapcGraphics.Rd
@@ -2,7 +2,6 @@
\name{dapc graphics}
\alias{scatter.dapc}
\alias{assignplot}
-\alias{compoplot}
\title{Graphics for Discriminant Analysis of Principal Components (DAPC)}
\description{
These functions provide graphic outputs for Discriminant Analysis of
@@ -16,29 +15,24 @@
'discriminant functions'), with a screeplot of eigenvalues as inset.\cr
- \code{assignplot}: plot showing the probabilities of assignment of
individuals to the different clusters.\cr
- - \code{compoplot}: barplot showing the probabilities of assignment of
- individuals to the different clusters.\cr
}
\usage{
\method{scatter}{dapc}(x, xax=1, yax=2, grp=x$grp, col=seasun(length(levels(grp))),
- pch=20, bg="white", solid=.7, scree.da=TRUE,
- scree.pca=FALSE, posi.da="bottomright",
- posi.pca="bottomleft", bg.inset="white", ratio.da=.25,
- ratio.pca=.25, inset.da=0.02, inset.pca=0.02,
- inset.solid=.5, onedim.filled=TRUE, mstree=FALSE, lwd=1,
- lty=1, segcol="black", legend=FALSE, posi.leg="topright",
- cleg=1, txt.leg=levels(grp), cstar = 1, cellipse = 1.5,
- axesell = FALSE, label = levels(grp), clabel = 1, xlim =
- NULL, ylim = NULL, grid = FALSE, addaxes = TRUE, origin =
- c(0,0), include.origin = TRUE, sub = "", csub = 1, possub =
- "bottomleft", cgrid = 1, pixmap = NULL, contour = NULL, area
- = NULL, label.inds = NULL, \ldots)
+ pch=20, bg="white", solid=.7, scree.da=TRUE,
+ scree.pca=FALSE, posi.da="bottomright",
+ posi.pca="bottomleft", bg.inset="white", ratio.da=.25,
+ ratio.pca=.25, inset.da=0.02, inset.pca=0.02,
+ inset.solid=.5, onedim.filled=TRUE, mstree=FALSE, lwd=1,
+ lty=1, segcol="black", legend=FALSE, posi.leg="topright",
+ cleg=1, txt.leg=levels(grp), cstar = 1, cellipse = 1.5,
+ axesell = FALSE, label = levels(grp), clabel = 1, xlim =
+ NULL, ylim = NULL, grid = FALSE, addaxes = TRUE, origin =
+ c(0,0), include.origin = TRUE, sub = "", csub = 1, possub =
+ "bottomleft", cgrid = 1, pixmap = NULL, contour = NULL, area
+ = NULL, label.inds = NULL, \ldots)
assignplot(x, only.grp=NULL, subset=NULL, new.pred=NULL, cex.lab=.75,pch=3)
-
-compoplot(x, only.grp=NULL, subset=NULL, new.pred=NULL, col=NULL, lab=NULL,
- legend=TRUE, txt.leg=NULL, ncol=4, posi=NULL, cleg=.8, bg=transp("white"), ...)
}
\arguments{
\item{x}{a \code{dapc} object.}
@@ -103,16 +97,9 @@ compoplot(x, only.grp=NULL, subset=NULL, new.pred=NULL, col=NULL, lab=NULL,
at the bottom of the plot. To visualize these individuals only, specify
\code{only.grp="unknown"}.}
\item{cex.lab}{a \code{numeric} indicating the size of labels.}
- \item{lab}{a vector of characters (recycled if necessary) of labels
- for the individuals; if left to NULL, the row names of \code{x$tab}
- are used.}
\item{txt.leg}{a character vector indicating the text to be used in
the legend; if not provided, group names stored in \code{x$grp} are
used.}
- \item{ncol}{an integer indicating the number of columns of the legend,
- defaulting to 4.}
- \item{posi}{a characther string indicating the position of the legend; can match
- any combination of "top/bottom" and "left/right". See \code{?legend}.}
\item{label.inds}{Named list of arguments passed to the
\code{\link[vegan]{orditorp}} function. This will label individual points
witout overlapping. Arguments \code{x} and \code{display} are hardcoded and
diff --git a/man/dapcIllus.Rd b/man/dapcIllus.Rd
index 53ebeae..bde7690 100644
--- a/man/dapcIllus.Rd
+++ b/man/dapcIllus.Rd
@@ -69,9 +69,6 @@ detach(dapcIllus)
}
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\references{
Jombart, T., Devillard, S. and Balloux, F. Discriminant
analysis of principal components: a new method for the analysis of
@@ -87,5 +84,7 @@ genetically structured populations. Submitted to \emph{Genetics}.
- \code{\link{find.clusters}}: to identify clusters without prior.
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{datasets}
-
diff --git a/man/df2genind.Rd b/man/df2genind.Rd
index 7374acb..483e8bf 100644
--- a/man/df2genind.Rd
+++ b/man/df2genind.Rd
@@ -18,15 +18,17 @@ decription)}
coding one genotype at one locus. If not provided, this is determined from
data.}
-\item{ind.names}{optinal, a vector giving the individuals names; if NULL, taken
-from rownames of X. If factor or numeric, vector is converted to character.}
+\item{ind.names}{optinal, a vector giving the individuals names; if NULL,
+taken from rownames of X. If factor or numeric, vector is converted to
+character.}
\item{loc.names}{an optional character vector giving the markers names; if
NULL, taken from colnames of X.}
\item{pop}{an optional factor giving the population of each individual.}
-\item{NA.char}{a character string corresponding to missing allele (to be treated as NA)}
+\item{NA.char}{a character string corresponding to missing allele (to be
+treated as NA)}
\item{ploidy}{an integer indicating the degree of ploidy of the genotypes.}
@@ -43,25 +45,28 @@ levels in your strata. see \code{\link{hierarchy}} for details.}
}
\value{
an object of the class \linkS4class{genind} for \code{df2genind}; a
-matrix of biallelic genotypes for \code{genind2df}
+ matrix of biallelic genotypes for \code{genind2df}
}
\description{
The function \code{df2genind} converts a data.frame (or a matrix) into a
\linkS4class{genind} object. The data.frame must meet the following
-requirements:\cr
-- genotypes are in row (one row per genotype)\cr
-- markers/loci are in columns\cr
-- each element is a string of characters coding alleles, ideally separated by a character string (argument \code{sep});
-if no separator is used, the number of characters coding alleles must be indicated (argument \code{ncode}).\cr
+requirements:
+\itemize{
+\item genotypes are in row (one row per genotype)
+\item markers/loci are in columns
+\item each element is a string of characters coding alleles, ideally
+separated by a character string (argument \code{sep}); if no separator is
+used, the number of characters coding alleles must be indicated (argument
+\code{ncode}).}
}
\details{
-See \code{\link{genind2df}} to convert \linkS4class{genind} objects back to such a
-data.frame.
+See \code{\link{genind2df}} to convert \linkS4class{genind} objects back to
+such a data.frame.
=== Details for the \code{sep} argument ===\cr this character is directly
-used in reguar expressions like \code{gsub}, and thus require some
-characters to be preceeded by double backslashes. For instance, "/" works
-but "|" must be coded as "\\|".
+used in reguar expressions like \code{gsub}, and thus require some characters
+to be preceeded by double backslashes. For instance, "/" works but "|" must
+be coded as "\\|".
}
\examples{
@@ -81,14 +86,13 @@ genind2df(obj)
genind2df(obj, sep="/")
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}, Zhian N. Kamvar
- \email{kamvarz at science.oregonstate.edu}
-}
\seealso{
\code{\link{genind2df}}, \code{\link{import2genind}},
\code{\link{read.genetix}}, \code{\link{read.fstat}},
\code{\link{read.structure}}
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}, Zhian N. Kamvar
+ \email{kamvarz at science.oregonstate.edu}
+}
\keyword{manip}
-
diff --git a/man/doc_C_routines.Rd b/man/doc_C_routines.Rd
new file mode 100644
index 0000000..758b3e4
--- /dev/null
+++ b/man/doc_C_routines.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/doc_C_routines.R
+\docType{data}
+\name{.internal_C_routines}
+\alias{.internal_C_routines}
+\alias{CheckAllSeg}
+\alias{GLdotProd}
+\alias{GLsumFreq}
+\alias{GLsumInt}
+\alias{binIntToBytes}
+\alias{bytesToBinInt}
+\alias{bytesToInt}
+\alias{nb_shared_all}
+\title{Internal C routines}
+\format{An object of class \code{NULL} of length 0.}
+\usage{
+.internal_C_routines
+}
+\description{
+These functions are internal C routines used in adegenet. Do not use them
+unless you know what you are doing.
+}
+\author{
+Thibaut Jombart
+}
+\keyword{datasets}
diff --git a/man/eHGDP.Rd b/man/eHGDP.Rd
index a8a5111..f4e3c55 100644
--- a/man/eHGDP.Rd
+++ b/man/eHGDP.Rd
@@ -145,4 +145,3 @@ principal components: a new method for the analysis of genetically
structured populations. Submitted to \emph{BMC genetics}.
}
\keyword{datasets}
-
diff --git a/man/export_to_mvmapper.Rd b/man/export_to_mvmapper.Rd
new file mode 100644
index 0000000..7ed292d
--- /dev/null
+++ b/man/export_to_mvmapper.Rd
@@ -0,0 +1,122 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/export_to_mvmapper.R
+\name{export_to_mvmapper}
+\alias{export_to_mvmapper}
+\alias{export_to_mvmapper.default}
+\alias{export_to_mvmapper.dapc}
+\alias{export_to_mvmapper.dudi}
+\alias{export_to_mvmapper.spca}
+\title{Export analysis for mvmapper visualisation}
+\usage{
+export_to_mvmapper(x, ...)
+
+\method{export_to_mvmapper}{default}(x, ...)
+
+\method{export_to_mvmapper}{dapc}(x, info, write_file = TRUE,
+ out_file = NULL, ...)
+
+\method{export_to_mvmapper}{dudi}(x, info, write_file = TRUE,
+ out_file = NULL, ...)
+
+\method{export_to_mvmapper}{spca}(x, info, write_file = TRUE,
+ out_file = NULL, ...)
+}
+\arguments{
+\item{x}{The analysis to be exported. Can be a \code{dapc}, \code{spca}, or a
+\code{dudi} object.}
+
+\item{...}{Further arguments to pass to other methods.}
+
+\item{info}{A \code{data.frame} with additional information containing at
+least the following columns: \code{key} (unique individual identifier),
+\code{lat} (latitude), and \code{lon} (longitude). Other columns will be
+exported as well, but are optional.}
+
+\item{write_file}{A \code{logical} indicating if the output should be written
+out to a .csv file. Defaults to \code{TRUE}.}
+
+\item{out_file}{A character string indicating the file to which the output
+should be written. If NULL, the file used will be named
+\code{'mvmapper_data_[date and time].csv'}}
+}
+\value{
+A \code{data.frame} which can serve as input to \code{mvmapper},
+containing at least the following columns:
+
+\itemize{
+
+\item \code{key}: unique individual identifiers
+
+\item \code{PC1}: first principal component; further principal components are
+optional, but if provided will be numbered and follow \code{PC1}.
+
+\item \code{lat}: latitude for each individual
+
+\item \code{lon}: longitude for each individual
+
+}
+
+In addition, specific information is added for some analyses:
+
+\itemize{
+
+\item \code{spca}: \code{Lag_PC} columns contain the lag-vectors of the
+principal components; the lag operator computes, for each individual, the
+average score of neighbouring individuals; it is useful for clarifying
+patches and clines.
+
+\item \code{dapc}: \code{grp} is the group used in the analysis;
+\code{assigned_grp} is the group assignment based on the discriminant
+functions; \code{support} is the statistical support (i.e. assignment
+probability) for \code{assigned_grp}.
+
+}
+}
+\description{
+\code{mvmapper} is an interactive tool for visualising outputs of a
+multivariate analysis on a map from a web browser. The function
+\code{export_to_mvmapper} is a generic with methods for several standard
+classes of analyses in \code{adegenet} and \code{ade4}. Information on
+individual locations, as well as any other relevant data, is passed through
+the second argument \code{info}. By default, the function returns a formatted
+\code{data.frame} and writes the output to a .csv file.\cr
+}
+\details{
+\code{mvmapper} can be found at:
+\url{https://popphylotools.github.io/mvMapper/}
+}
+\examples{
+
+data(sim2pop)
+
+dapc1 <- dapc(sim2pop, n.pca = 10, n.da = 1)
+
+info <- data.frame(key = indNames(sim2pop),
+ lat = other(sim2pop)$xy[,2],
+ lon = other(sim2pop)$xy[,1],
+ Population = pop(sim2pop))
+
+out <- export_to_mvmapper(dapc1, info, write_file = FALSE)
+head(out)
+
+data(rupica)
+
+spca1 <- spca(rupica, type=5, d1 = 0, d2 = 2300,
+ plot = FALSE, scannf = FALSE,
+ nfposi = 2,nfnega = 0)
+
+info <- data.frame(key = indNames(rupica),
+ lat = rupica$other$xy[,2],
+ lon = rupica$other$xy[,1])
+
+out <- export_to_mvmapper(spca1, info, write_file = FALSE)
+head(out)
+
+}
+\seealso{
+\code{mvmapper} is available at:
+\url{https://popphylotools.github.io/mvMapper/}
+}
+\author{
+Thibaut Jombart \email{thibautjombart at gmail.com}
+}
diff --git a/man/fasta2DNAbin.Rd b/man/fasta2DNAbin.Rd
index ecd0330..9670c71 100644
--- a/man/fasta2DNAbin.Rd
+++ b/man/fasta2DNAbin.Rd
@@ -23,7 +23,11 @@ fasta2DNAbin(file, quiet=FALSE, chunkSize=10, snpOnly=FALSE)
}
\arguments{
\item{file}{ a character string giving the path to the file to
- convert, with the extension ".fa", ".fas", or ".fasta".}
+ convert, with the extension ".fa", ".fas", or ".fasta".
+
+ Can also be a \link{connection} (which will be opened for reading if
+ necessary, and if so \code{\link{close}}d (and hence destroyed) at the
+ end of the function call).}
\item{quiet}{a logical stating whether a conversion messages should be
printed (FALSE, default) or not (TRUE).}
\item{chunkSize}{an integer indicating the number of genomes to be
diff --git a/man/find.clusters.Rd b/man/find.clusters.Rd
index 5b2f7dc..83e2aa2 100644
--- a/man/find.clusters.Rd
+++ b/man/find.clusters.Rd
@@ -7,130 +7,161 @@
\alias{find.clusters.genlight}
\alias{.find.sub.clusters}
\title{find.cluster: cluster identification using successive K-means}
+
\description{
- These functions implement the clustering procedure used in
- Discriminant Analysis of Principal Components (DAPC, Jombart et
- al. 2010). This procedure consists in running successive K-means with
- an increasing number of clusters (\code{k}), after transforming data
- using a principal component analysis (PCA). For each model, a
- statistical measure of goodness of fit (by default, BIC) is computed,
- which allows to choose the optimal \code{k}. See \code{details} for a
- description of how to select the optimal \code{k} and
- \code{vignette("adegenet-dapc")} for a tutorial.
-
- Optionally, hierarchical clustering can be sought by providing a prior
- clustering of individuals (argument \code{clust}). In such case, clusters will
- be sought within each prior group.
-
- The K-means procedure used in \code{find.clusters} is \code{\link[stats]{kmeans}} function
- from the \code{stats} package. The PCA function is \code{\link[ade4]{dudi.pca}} from the
- \code{ade4} package, except for \linkS4class{genlight} objects which
- use the \code{\link{glPca}} procedure from adegenet.
-
- \code{find.clusters} is a generic function with methods for the
+
+These functions implement the clustering procedure used in Discriminant Analysis
+of Principal Components (DAPC, Jombart et al. 2010). This procedure consists in
+running successive K-means with an increasing number of clusters (\code{k}),
+after transforming data using a principal component analysis (PCA). For each
+model, a statistical measure of goodness of fit (by default, BIC) is computed,
+which allows to choose the optimal \code{k}. See \code{details} for a
+description of how to select the optimal \code{k} and
+\code{vignette("adegenet-dapc")} for a tutorial.
+
+Optionally, hierarchical clustering can be sought by providing a prior
+clustering of individuals (argument \code{clust}). In such case, clusters will
+be sought within each prior group.
+
+The K-means procedure used in \code{find.clusters} is
+\code{\link[stats]{kmeans}} function from the \code{stats} package. The PCA
+function is \code{\link[ade4]{dudi.pca}} from the \code{ade4} package, except
+for \linkS4class{genlight} objects which use the \code{\link{glPca}} procedure
+from adegenet.
+
+\code{find.clusters} is a generic function with methods for the
following types of objects:\cr
- - \code{data.frame} (only numeric data)\cr
- - \code{matrix} (only numeric data)\cr
- - \code{\linkS4class{genind}} objects (genetic markers)\cr
- - \code{\linkS4class{genlight}} objects (genome-wide SNPs)
-
+ \itemize{
+ \item \code{data.frame} (only numeric data)\cr
+ \item \code{matrix} (only numeric data)\cr
+ \item \code{\linkS4class{genind}} objects (genetic markers)\cr
+ \item \code{\linkS4class{genlight}} objects (genome-wide SNPs)
+ }
}
\usage{
-\method{find.clusters}{data.frame}(x, clust=NULL, n.pca=NULL,
- n.clust=NULL, stat=c("BIC","AIC", "WSS"),
- choose.n.clust=TRUE,criterion=c("diffNgroup",
- "min","goesup", "smoothNgoesup", "goodfit"),
- max.n.clust=round(nrow(x)/10), n.iter=1e5, n.start=10,
- center=TRUE, scale=TRUE, pca.select=c("nbEig","percVar"),
- perc.pca=NULL, \ldots, dudi=NULL)
+
+\method{find.clusters}{data.frame}(x, clust = NULL, n.pca = NULL, n.clust =
+ NULL, method = c("kmeans", "ward"), stat = c("BIC","AIC", "WSS"),
+ choose.n.clust = TRUE, criterion = c("diffNgroup", "min","goesup",
+ "smoothNgoesup", "goodfit"), max.n.clust = round(nrow(x)/10),
+ n.iter = 1e5, n.start = 10, center = TRUE, scale = TRUE,
+ pca.select = c("nbEig","percVar"), perc.pca = NULL, \ldots, dudi =
+ NULL)
\method{find.clusters}{matrix}(x, \ldots)
-\method{find.clusters}{genind}(x, clust=NULL, n.pca=NULL, n.clust=NULL,
- stat=c("BIC","AIC", "WSS"), choose.n.clust=TRUE,
- criterion=c("diffNgroup", "min","goesup", "smoothNgoesup",
- "goodfit"), max.n.clust=round(nrow(x at tab)/10), n.iter=1e5,
- n.start=10, scale=FALSE, truenames=TRUE, \ldots)
-
-\method{find.clusters}{genlight}(x, clust=NULL, n.pca=NULL,
- n.clust=NULL, stat=c("BIC", "AIC",
- "WSS"),choose.n.clust=TRUE, criterion=c("diffNgroup",
- "min","goesup","smoothNgoesup", "goodfit"),
- max.n.clust=round(nInd(x)/10), n.iter=1e5,n.start=10,
- scale=FALSE, pca.select=c("nbEig","percVar"),
- perc.pca=NULL,glPca=NULL, \ldots)
-}
+\method{find.clusters}{genind}(x, clust = NULL, n.pca = NULL, n.clust = NULL,
+ method = c("kmeans", "ward"), stat = c("BIC","AIC", "WSS"),
+ choose.n.clust = TRUE, criterion = c("diffNgroup", "min","goesup",
+ "smoothNgoesup", "goodfit"), max.n.clust = round(nrow(x at tab)/10),
+ n.iter = 1e5, n.start = 10, scale = FALSE, truenames = TRUE,
+ \ldots)
+
+\method{find.clusters}{genlight}(x, clust = NULL, n.pca = NULL, n.clust = NULL,
+ method = c("kmeans", "ward"), stat = c("BIC", "AIC", "WSS"),
+ choose.n.clust = TRUE, criterion = c("diffNgroup",
+ "min","goesup","smoothNgoesup", "goodfit"), max.n.clust =
+ round(nInd(x)/10), n.iter = 1e5,n.start = 10, scale = FALSE,
+ pca.select = c("nbEig","percVar"), perc.pca = NULL,glPca=NULL,
+ \ldots) }
+
\arguments{
+
\item{x}{\code{a data.frame}, \code{matrix}, or \code{\linkS4class{genind}}
object. For the \code{data.frame} and \code{matrix} arguments, only
quantitative variables should be provided.}
- \item{clust}{an optional \code{factor} indicating a prior group membership of
+
+\item{clust}{an optional \code{factor} indicating a prior group membership of
individuals. If provided, sub-clusters will be sought within each prior
group.}
- \item{n.pca}{an \code{integer} indicating the number of axes retained in the
- Principal Component Analysis (PCA) step. If \code{NULL}, interactive selection
- is triggered.}
- \item{n.clust}{ an optinal \code{integer} indicating the number of clusters to
+
+\item{n.pca}{an \code{integer} indicating the number of axes retained in the
+ Principal Component Analysis (PCA) step. If \code{NULL}, interactive
+ selection is triggered.}
+
+\item{n.clust}{ an optinal \code{integer} indicating the number of clusters to
be sought. If provided, the function will only run K-means once, for this
number of clusters. If left as \code{NULL}, several K-means are run for a
range of k (number of clusters) values.}
+
+ \item{method}{a \code{character} string indicating the type of clustering
+ method to be used; "kmeans" (default) uses K-means clustering, and is the
+ original implementation of \code{find.clusters}; "ward" is an alternative
+ which uses Ward's hierarchical clustering; this latter method seems to be more
+ reliable on some simulated datasets, but will be less computer-efficient for
+ large numbers (thousands) of individuals.}
+
\item{stat}{ a \code{character} string matching 'BIC', 'AIC', or 'WSS', which
- indicates the statistic to be computed for each model (i.e., for each value of
- \code{k}). BIC: Bayesian Information Criterion. AIC: Aikaike's Information
- Criterion. WSS: within-groups sum of squares, that is, residual variance.}
- \item{choose.n.clust}{ a \code{logical} indicating whether the number of
+ indicates the statistic to be computed for each model (i.e., for each value
+ of \code{k}). BIC: Bayesian Information Criterion. AIC: Aikaike's
+ Information Criterion. WSS: within-groups sum of squares, that is, residual
+ variance.}
+
+\item{choose.n.clust}{ a \code{logical} indicating whether the number of
clusters should be chosen by the user (TRUE, default), or automatically,
based on a given criterion (argument \code{criterion}). It is HIGHLY
- RECOMMENDED to choose the number of clusters INTERACTIVELY, since
- i) the decrease of the summary statistics (BIC by default) is
- informative, and ii) no criteria for automatic selection is
- appropriate to all cases (see details).}
- \item{criterion}{ a \code{character} string matching "diffNgroup",
- "min","goesup", "smoothNgoesup", or "goodfit", indicating the criterion for automatic
- selection of the optimal number of clusters. See \code{details} for
- an explanation of these procedures.}
- \item{max.n.clust}{ an \code{integer} indicating the maximum number of
- clusters to be tried. Values of 'k' will be picked up between 1 and \code{max.n.clust}}
- \item{n.iter}{ an \code{integer} indicating the number of iterations to be used
+ RECOMMENDED to choose the number of clusters INTERACTIVELY, since i) the
+ decrease of the summary statistics (BIC by default) is informative, and ii)
+ no criteria for automatic selection is appropriate to all cases (see
+ details).}
+
+\item{criterion}{ a \code{character} string matching "diffNgroup",
+ "min","goesup", "smoothNgoesup", or "goodfit", indicating the criterion for
+ automatic selection of the optimal number of clusters. See \code{details}
+ for an explanation of these procedures.}
+
+\item{max.n.clust}{ an \code{integer} indicating the maximum number of clusters
+ to be tried. Values of 'k' will be picked up between 1 and
+ \code{max.n.clust}}
+
+\item{n.iter}{ an \code{integer} indicating the number of iterations to be used
in each run of K-means algorithm. Corresponds to \code{iter.max} of
\code{kmeans} function.}
- \item{n.start}{ an \code{integer} indicating the number of randomly
- chosen starting centroids to be used in each run of the K-means
- algorithm. Using more starting points ensures convergence of the
- algorithm. Corresponds to \code{nstart} of \code{kmeans} function.}
- \item{center}{a \code{logical} indicating whether variables should be centred to
+
+\item{n.start}{ an \code{integer} indicating the number of randomly chosen
+ starting centroids to be used in each run of the K-means algorithm. Using
+ more starting points ensures convergence of the algorithm. Corresponds to
+ \code{nstart} of \code{kmeans} function.}
+
+\item{center}{a \code{logical} indicating whether variables should be centred to
mean 0 (TRUE, default) or not (FALSE). Always TRUE for \linkS4class{genind}
objects.}
- \item{scale}{a \code{logical} indicating whether variables should be
- scaled (TRUE) or not (FALSE, default). Scaling consists in dividing
- variables by their (estimated) standard deviation to account for
- trivial differences in variances. In allele frequencies, it comes with
- the risk of giving uninformative alleles more importance while
- downweighting informative alleles. Further scaling options are
- available for \linkS4class{genind} objects (see argument
- \code{scale.method}).}
- \item{pca.select}{a \code{character} indicating the mode of selection of PCA
- axes, matching either "nbEig" or "percVar". For "nbEig", the user
- has to specify the number of axes retained (interactively, or via
+
+\item{scale}{a \code{logical} indicating whether variables should be scaled
+ (TRUE) or not (FALSE, default). Scaling consists in dividing variables by
+ their (estimated) standard deviation to account for trivial differences in
+ variances. In allele frequencies, it comes with the risk of giving
+ uninformative alleles more importance while downweighting informative
+ alleles. Further scaling options are available for \linkS4class{genind}
+ objects (see argument \code{scale.method}).}
+
+\item{pca.select}{a \code{character} indicating the mode of selection of PCA
+ axes, matching either "nbEig" or "percVar". For "nbEig", the user has to
+ specify the number of axes retained (interactively, or via
\code{n.pca}). For "percVar", the user has to specify the minimum amount of
the total variance to be preserved by the retained axes, expressed as a
percentage (interactively, or via \code{perc.pca}). }
- \item{perc.pca}{a \code{numeric} value between 0 and 100 indicating the
- minimal percentage of the total variance of the data to be expressed by the
- retained axes of PCA.}
- \item{truenames}{a \code{logical} indicating whether true (i.e., user-specified)
- labels should be used in object outputs (TRUE, default) or not
- (FALSE), in which case generic labels are used.}
- \item{\ldots}{further arguments to be passed to other functions. For
+
+\item{perc.pca}{a \code{numeric} value between 0 and 100 indicating the minimal
+ percentage of the total variance of the data to be expressed by the retained
+ axes of PCA.}
+
+\item{truenames}{a \code{logical} indicating whether true (i.e., user-specified)
+ labels should be used in object outputs (TRUE, default) or not (FALSE), in
+ which case generic labels are used.}
+
+\item{\ldots}{further arguments to be passed to other functions. For
\code{find.clusters.matrix}, arguments are to match those of the
\code{data.frame} method.}
- \item{dudi}{optionally, a multivariate analysis with the class
- \code{dudi} (from the ade4 package). If provided, prior PCA will be
- ignored, and this object will be used as a prior step for variable
- orthogonalisation.}
- \item{glPca}{an optional \code{\link{glPca}} object; if provided,
- dimension reduction is not performed (saving computational time) but
- taken directly from this object.}
+
+\item{dudi}{optionally, a multivariate analysis with the class \code{dudi} (from
+ the ade4 package). If provided, prior PCA will be ignored, and this object
+ will be used as a prior step for variable orthogonalisation.}
+
+\item{glPca}{an optional \code{\link{glPca}} object; if provided, dimension
+ reduction is not performed (saving computational time) but taken directly
+ from this object.}
+
}
\details{
=== ON THE SELECTION OF K ===\cr
@@ -272,9 +303,10 @@ mtext(3, tex="'X' indicates the actual number of clusters")
x <- glSim(100,500,500)
x
plot(x)
-grp <- find.clusters(x, n.pca=100, choose=FALSE, stat="BIC")
-plot(grp$Kstat, type="o", xlab="number of clusters (K)",
-ylab="BIC",main="find.clusters on a genlight object\n(two groups)")
+grp <- find.clusters(x, n.pca = 100, choose = FALSE, stat = "BIC")
+plot(grp$Kstat, type = "o", xlab = "number of clusters (K)",
+ ylab = "BIC",
+ main = "find.clusters on a genlight object\n(two groups)")
}
}
\keyword{multivariate}
diff --git a/man/genind2df.Rd b/man/genind2df.Rd
index 8496a80..0dda4d4 100644
--- a/man/genind2df.Rd
+++ b/man/genind2df.Rd
@@ -45,12 +45,11 @@ genind2df(obj)
genind2df(obj, sep="/")
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\seealso{
\code{\link{df2genind}}, \code{\link{import2genind}}, \code{\link{read.genetix}},
\code{\link{read.fstat}}, \code{\link{read.structure}}
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{manip}
-
diff --git a/man/genind2genpop.Rd b/man/genind2genpop.Rd
index 02e2404..6046427 100644
--- a/man/genind2genpop.Rd
+++ b/man/genind2genpop.Rd
@@ -90,13 +90,12 @@ summary(genind2genpop(microbov, ~coun/spe)) # Conversion based on country and sp
}
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\seealso{
\linkS4class{genind}, \linkS4class{genpop}
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{classes}
\keyword{manip}
\keyword{multivariate}
-
diff --git a/man/genlight.Rd b/man/genlight.Rd
index 41552c4..4a71585 100644
--- a/man/genlight.Rd
+++ b/man/genlight.Rd
@@ -7,6 +7,7 @@
\alias{[,genlight,ANY,ANY,ANY-method}
\alias{initialize,genlight-method}
\alias{show,genlight-method}
+\alias{tab,genlight-method}
\alias{nLoc,genlight-method}
\alias{nInd,genlight-method}
\alias{dim,genlight-method}
@@ -151,6 +152,13 @@
used to access the content of slots of the object.}
\item{$<-}{\code{signature(x = "genlight")}: similar to the @ operator;
used to replace the content of slots of the object.}
+
+ \item{tab}{\code{signature(x = "genlight")}: returns a table of
+ allele counts (see \code{\link{tab}}; additional arguments are
+ \code{freq}, a logical stating if relative frequencies should be
+ returned (use for varying ploidy), and \code{NA.method}, a character
+ indicating if missing values should be replaced by the mean
+ frequency("mean"), or left as is ("asis").}
\item{nInd}{\code{signature(x = "genlight")}: returns the number of
individuals in the object.}
diff --git a/man/glAux.Rd b/man/glAux.Rd
index 6958492..e245aaa 100644
--- a/man/glAux.Rd
+++ b/man/glAux.Rd
@@ -32,7 +32,7 @@ glNA(x, alleleAsUnit = TRUE)
glMean(x, alleleAsUnit = TRUE)
glVar(x, alleleAsUnit = TRUE)
glDotProd(x, center = FALSE, scale = FALSE, alleleAsUnit = FALSE,
- parallel = require("parallel"), n.cores = NULL)
+ parallel = FALSE, n.cores = NULL)
}
\arguments{
\item{x}{a \linkS4class{genlight} object}
diff --git a/man/glPca.Rd b/man/glPca.Rd
index c5983d0..80aa308 100644
--- a/man/glPca.Rd
+++ b/man/glPca.Rd
@@ -27,7 +27,7 @@
}
\usage{
glPca(x, center = TRUE, scale = FALSE, nf = NULL, loadings = TRUE,
- alleleAsUnit = FALSE, useC = TRUE, parallel = require("parallel"),
+ alleleAsUnit = FALSE, useC = TRUE, parallel = FALSE,
n.cores = NULL, returnDotProd=FALSE, matDotProd=NULL)
\method{print}{glPca}(x, \dots)
diff --git a/man/hierarchy-methods.Rd b/man/hierarchy-methods.Rd
index 1fb7c67..0a63d7a 100644
--- a/man/hierarchy-methods.Rd
+++ b/man/hierarchy-methods.Rd
@@ -70,11 +70,10 @@ hier(microbov) <- ~Species/Breed
head(hier(microbov, ~Species/Breed))
}
-\author{
-Zhian N. Kamvar
-}
\seealso{
\code{\link{strata}} \code{\link{genind}}
\code{\link{as.genind}}
}
-
+\author{
+Zhian N. Kamvar
+}
diff --git a/man/hybridize.Rd b/man/hybridize.Rd
index eddb1d7..c328cbc 100644
--- a/man/hybridize.Rd
+++ b/man/hybridize.Rd
@@ -95,10 +95,9 @@ add.scatter.eig(coa1$eig,2,1,2)
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\seealso{
\code{\link{seploc}}, \code{\link{seppop}}, \code{\link{repool}}
}
-
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
diff --git a/man/hybridtoy.Rd b/man/hybridtoy.Rd
new file mode 100644
index 0000000..b8f8209
--- /dev/null
+++ b/man/hybridtoy.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/datasets.R
+\docType{data}
+\name{hybridtoy}
+\alias{hybridtoy}
+\title{Toy hybrid dataset}
+\format{a \linkS4class{genind} object}
+\description{
+Toy hybrid dataset
+}
+\examples{
+data(hybridtoy)
+x <- hybridtoy
+pca1 <- dudi.pca(tab(x), scannf=FALSE, scale=FALSE)
+s.class(pca1$li, pop(x))
+
+if(require(ggplot2)) {
+p <- ggplot(pca1$li, aes(x=Axis1)) +
+ geom_density(aes(fill=pop(x)), alpha=.4, adjust=1) +
+ geom_point(aes(y=0, color=pop(x)), pch="|", size=10, alpha=.5)
+p
+}
+
+## kmeans
+km <- find.clusters(x, n.pca=10, n.clust=2)
+table(pop(x), km$grp)
+
+## dapc
+dapc1 <- dapc(x, pop=km$grp, n.pca=10, n.da=1)
+scatter(dapc1)
+scatter(dapc1, grp=pop(x))
+compoplot(dapc1, col.pal=spectral, n.col=2)
+
+## ML-EM with hybrids
+res <- snapclust(x, k=2, hybrids=TRUE, detailed=TRUE)
+compoplot(res, n.col=3)
+table(res$group, pop(x))
+
+}
+\author{
+Data simulated by Marie-Pauline Beugin. Example by Thibaut Jombart.
+}
diff --git a/man/import2genind.Rd b/man/import2genind.Rd
index 6944ba0..8af3b54 100644
--- a/man/import2genind.Rd
+++ b/man/import2genind.Rd
@@ -53,9 +53,6 @@ import2genind(system.file("files/nancycats.str",
package="adegenet"), onerowperind=FALSE, n.ind=237, n.loc=9, col.lab=1, col.pop=2, ask=FALSE)
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\references{
Belkhir K., Borsa P., Chikhi L., Raufaste N. & Bonhomme F.
(1996-2004) GENETIX 4.05, logiciel sous Windows TM pour la genetique des
@@ -81,5 +78,7 @@ data analysis: a survival guide \emph{Nature}, \bold{7}: 745-758
\code{\link{read.fstat}}, \code{\link{read.structure}},
\code{\link{read.genepop}}
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{manip}
-
diff --git a/man/makefreq.Rd b/man/makefreq.Rd
index 9aeb140..f0921e0 100644
--- a/man/makefreq.Rd
+++ b/man/makefreq.Rd
@@ -71,12 +71,11 @@ add.scatter.eig(pca1$eig,nf=2,xax=1,yax=2,posi="top")
}
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\seealso{
\code{\link{genpop}}
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{manip}
\keyword{multivariate}
-
diff --git a/man/microbov.Rd b/man/microbov.Rd
index 69f265c..bf015da 100644
--- a/man/microbov.Rd
+++ b/man/microbov.Rd
@@ -100,4 +100,3 @@ Multiple Co-Inertia Analysis. \emph{Genetics Selection Evolution}.
\bold{39}: 545--567.
}
\keyword{datasets}
-
diff --git a/man/minorAllele.Rd b/man/minorAllele.Rd
index 08dbb45..b05d017 100644
--- a/man/minorAllele.Rd
+++ b/man/minorAllele.Rd
@@ -28,10 +28,9 @@ m.freq
}
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\seealso{
\code{\link{isPoly}}
}
-
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
diff --git a/man/nancycats.Rd b/man/nancycats.Rd
index da2cdd2..f02c924 100644
--- a/man/nancycats.Rd
+++ b/man/nancycats.Rd
@@ -56,4 +56,3 @@ habitat using: not all colonies are equal. submitted to \emph{Molecular
Ecology}
}
\keyword{datasets}
-
diff --git a/man/new.genind.Rd b/man/new.genind.Rd
index 20fa4a1..3c9f794 100644
--- a/man/new.genind.Rd
+++ b/man/new.genind.Rd
@@ -2,10 +2,10 @@
% Please edit documentation in R/constructors.R
\docType{methods}
\name{initialize,genind-method}
-\alias{as.genind}
-\alias{genind}
\alias{initialize,genind-method}
\alias{initialize,genind-methods}
+\alias{genind}
+\alias{as.genind}
\title{genind constructor}
\usage{
\S4method{initialize}{genind}(.Object, tab, pop = NULL, prevcall = NULL,
@@ -59,4 +59,3 @@ Most users do not need using the constructor, but merely to convert raw allele d
\seealso{
the description of the \linkS4class{genind} class; \code{\link{df2genind}}
}
-
diff --git a/man/new.genpop.Rd b/man/new.genpop.Rd
index 98337b7..5ad3fb0 100644
--- a/man/new.genpop.Rd
+++ b/man/new.genpop.Rd
@@ -2,10 +2,10 @@
% Please edit documentation in R/constructors.R
\docType{methods}
\name{initialize,genpop-method}
-\alias{as.genpop}
-\alias{genpop}
\alias{initialize,genpop-method}
\alias{initialize,genpop-methods}
+\alias{genpop}
+\alias{as.genpop}
\title{genpop constructor}
\usage{
\S4method{initialize}{genpop}(.Object, tab, prevcall = NULL, ploidy = 2L,
@@ -42,4 +42,3 @@ Most users do not need using the constructor, but merely to convert raw allele d
\seealso{
the description of the \linkS4class{genpop} class; \code{\link{df2genind}} and related functions for reading raw allele data
}
-
diff --git a/man/old2new.Rd b/man/old2new.Rd
index c3d3966..18205bc 100644
--- a/man/old2new.Rd
+++ b/man/old2new.Rd
@@ -1,8 +1,8 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/old2new.R
\name{old2new_genind}
-\alias{old2new}
\alias{old2new_genind}
+\alias{old2new}
\alias{old2new_genlight}
\alias{old2new_genpop}
\title{Convert objects with obsolete classes into new objects}
@@ -30,4 +30,3 @@ Thibaut Jombart \email{t.jombart at imperial.ac.uk}\cr
Zhian N. Kamvar \email{kamvarz at science.oregonstate.edu}
}
\keyword{manip}
-
diff --git a/man/population-methods.Rd b/man/population-methods.Rd
index 5d2be70..4d751b8 100644
--- a/man/population-methods.Rd
+++ b/man/population-methods.Rd
@@ -49,4 +49,3 @@ head(pop(microbov.old))
\author{
Zhian N. Kamvar
}
-
diff --git a/man/read.PLINK.Rd b/man/read.PLINK.Rd
index df40cc5..e918b3a 100644
--- a/man/read.PLINK.Rd
+++ b/man/read.PLINK.Rd
@@ -78,10 +78,7 @@ the '.raw' format.
Data need to be exported from PLINK using the option "--recodeA" (and NOT
"--recodeAD"). The PLINK command should therefore look like: \code{plink
--file data --recodeA}. For more information on this topic, please look at
-this webpage: \url{http://pngu.mgh.harvard.edu/~purcell/plink/dataman.shtml}
-}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+this webpage: \url{http://zzz.bwh.harvard.edu/plink/}
}
\seealso{
- \code{?genlight} for a description of the class
@@ -100,5 +97,7 @@ format.
- another function \code{read.plink} is available in the package
\code{snpMatrix}.
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{manip}
-
diff --git a/man/read.fstat.Rd b/man/read.fstat.Rd
index 9cb0f95..2e2499d 100644
--- a/man/read.fstat.Rd
+++ b/man/read.fstat.Rd
@@ -32,9 +32,6 @@ obj <- read.fstat(system.file("files/nancycats.dat",package="adegenet"))
obj
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\references{
Fstat (version 2.9.3). Software by Jerome Goudet.
http://www2.unil.ch/popgen/softwares/fstat.htm\cr
@@ -44,5 +41,7 @@ http://www2.unil.ch/popgen/softwares/fstat.htm\cr
\code{\link{read.genetix}}, \code{\link{read.structure}},
\code{\link{read.genepop}}
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{manip}
-
diff --git a/man/read.genepop.Rd b/man/read.genepop.Rd
index 7018c2b..3b6a796 100644
--- a/man/read.genepop.Rd
+++ b/man/read.genepop.Rd
@@ -34,9 +34,6 @@ obj <- read.genepop(system.file("files/nancycats.gen",package="adegenet"))
obj
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\references{
Raymond M. & Rousset F, (1995). GENEPOP (version 1.2):
population genetics software for exact tests and ecumenicism. \emph{J.
@@ -47,5 +44,7 @@ Heredity}, \bold{86}:248-249 \cr
\code{\link{read.fstat}}, \code{\link{read.structure}},
\code{\link{read.genetix}}
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{manip}
-
diff --git a/man/read.genetix.Rd b/man/read.genetix.Rd
index e5bf2e9..b7ab6a7 100644
--- a/man/read.genetix.Rd
+++ b/man/read.genetix.Rd
@@ -32,9 +32,6 @@ obj <- read.genetix(system.file("files/nancycats.gtx",package="adegenet"))
obj
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\references{
Belkhir K., Borsa P., Chikhi L., Raufaste N. & Bonhomme F.
(1996-2004) GENETIX 4.05, logiciel sous Windows TM pour la genetique des
@@ -46,5 +43,7 @@ Universite de Montpellier II, Montpellier (France). \cr
\code{\link{read.fstat}}, \code{\link{read.structure}},
\code{\link{read.genepop}}
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{manip}
-
diff --git a/man/read.snp.Rd b/man/read.snp.Rd
index a5c4062..482ad98 100644
--- a/man/read.snp.Rd
+++ b/man/read.snp.Rd
@@ -4,8 +4,8 @@
\alias{read.snp}
\title{Reading Single Nucleotide Polymorphism data}
\usage{
-read.snp(file, quiet = FALSE, chunkSize = 1000,
- parallel = require("parallel"), n.cores = NULL, ...)
+read.snp(file, quiet = FALSE, chunkSize = 1000, parallel = FALSE,
+ n.cores = NULL, ...)
}
\arguments{
\item{file}{a character string giving the path to the file to convert, with
@@ -77,9 +77,6 @@ locNames(obj)
}
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\seealso{
- \code{?genlight} for a description of the class
\code{"\linkS4class{genlight}"}.
@@ -95,5 +92,7 @@ format.
- \code{\link{import2genind}}: read multiallelic markers from various
software into adegenet.\cr
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{manip}
-
diff --git a/man/read.structure.Rd b/man/read.structure.Rd
index 4099186..c7c99ed 100644
--- a/man/read.structure.Rd
+++ b/man/read.structure.Rd
@@ -73,9 +73,6 @@ obj <- read.structure(system.file("files/nancycats.str",package="adegenet"),
obj
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\references{
Pritchard, J.; Stephens, M. & Donnelly, P. (2000) Inference of
population structure using multilocus genotype data. \emph{Genetics},
@@ -86,5 +83,7 @@ population structure using multilocus genotype data. \emph{Genetics},
\code{\link{read.fstat}}, \code{\link{read.genetix}},
\code{\link{read.genepop}}
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{manip}
-
diff --git a/man/repool.Rd b/man/repool.Rd
index 1e330c4..e41630d 100644
--- a/man/repool.Rd
+++ b/man/repool.Rd
@@ -38,10 +38,9 @@ nastyCattle
}
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\seealso{
\code{\link{seploc}}, \code{\link{seppop}}
}
-
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
diff --git a/man/rupica.Rd b/man/rupica.Rd
index eba1d1c..5f15b77 100644
--- a/man/rupica.Rd
+++ b/man/rupica.Rd
@@ -77,4 +77,3 @@ rupicapra}): a consequence of landscape features and social factors?
submitted to \emph{Molecular Ecology}.
}
\keyword{datasets}
-
diff --git a/man/scaleGen.Rd b/man/scaleGen.Rd
index cdb06e9..81b0828 100644
--- a/man/scaleGen.Rd
+++ b/man/scaleGen.Rd
@@ -3,9 +3,11 @@
\docType{methods}
\name{scaleGen}
\alias{scaleGen}
+\alias{scaleGen-methods}
+\alias{scaleGen,genind-method}
+\alias{scaleGen,genpop-method}
\alias{scaleGen,genind-method}
\alias{scaleGen,genpop-method}
-\alias{scaleGen-methods}
\title{Compute scaled allele frequencies}
\usage{
scaleGen(x, ...)
@@ -90,4 +92,3 @@ Thibaut Jombart \email{t.jombart at imperial.ac.uk}
}
\keyword{manip}
\keyword{methods}
-
diff --git a/man/seploc.Rd b/man/seploc.Rd
index ee516f6..bf04be2 100644
--- a/man/seploc.Rd
+++ b/man/seploc.Rd
@@ -20,7 +20,7 @@
\S4method{seploc}{genind}(x,truenames=TRUE,res.type=c("genind","matrix"))
\S4method{seploc}{genpop}(x,truenames=TRUE,res.type=c("genpop","matrix"))
\S4method{seploc}{genlight}(x, n.block=NULL, block.size=NULL, random=FALSE,
- parallel=require(parallel), n.cores=NULL)
+ parallel=FALSE, n.cores=NULL)
}
\arguments{
\item{x}{a \linkS4class{genind} or a \linkS4class{genpop} object.}
diff --git a/man/showmekittens.Rd b/man/showmekittens.Rd
new file mode 100644
index 0000000..b2651ec
--- /dev/null
+++ b/man/showmekittens.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/showmekittens.R
+\name{showmekittens}
+\alias{showmekittens}
+\title{When you need a break...}
+\usage{
+showmekittens(x = NULL, list = FALSE)
+}
+\arguments{
+\item{x}{the name or index of the video to display; if NULL, a random video is chosen}
+
+\item{list}{a logical indicating if the list of available videos should be displayed}
+}
+\description{
+Genetic data analysis can be a harsh, tiring, daunting task.
+Sometimes, a mere break will not cut it.
+Sometimes, you need a kitten.
+}
+\details{
+Please send us more! Either pull request or submit an issue with a URL (use
+\code{adegenetIssues()}).
+}
+\author{
+Thibaut Jombart \email{thibautjombart at gmail.com}
+}
diff --git a/man/sim2pop.Rd b/man/sim2pop.Rd
index fe2c9b5..1eeb347 100644
--- a/man/sim2pop.Rd
+++ b/man/sim2pop.Rd
@@ -53,13 +53,12 @@ legend("topright",leg=c("Pop A", "Pop B"),pch=c(17,19))
}
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\references{
Balloux F (2001) Easypop (version 1.7): a computer program for
oppulation genetics simulations \emph{Journal of Heredity}, \bold{92}:
301-302
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{datasets}
-
diff --git a/man/snapclust.Rd b/man/snapclust.Rd
new file mode 100644
index 0000000..5828c43
--- /dev/null
+++ b/man/snapclust.Rd
@@ -0,0 +1,140 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/snapclust.R
+\name{snapclust}
+\alias{snapclust}
+\title{Maximum-likelihood genetic clustering using EM algorithm}
+\usage{
+snapclust(x, k, pop.ini = "ward", max.iter = 100, n.start = 10,
+ n.start.kmeans = 50, hybrids = FALSE, dim.ini = 100,
+ hybrid.coef = NULL, parent.lab = c("A", "B"), ...)
+}
+\arguments{
+\item{x}{a \linkS4class{genind} object}
+
+\item{k}{the number of clusters to look for}
+
+\item{pop.ini}{parameter indicating how the initial group membership should
+be found. If \code{NULL}, groups are chosen at random, and the algorithm
+will be run \code{n.start times}. If "kmeans", then the function
+\code{find.clusters} is used to define initial groups using the K-means
+algorithm. If "ward", then the function \code{find.clusters} is used to
+define initial groups using the Ward algorithm. Alternatively, a factor
+defining the initial cluster configuration can be provided.}
+
+\item{max.iter}{the maximum number of iteration of the EM algorithm}
+
+\item{n.start}{the number of times the EM algorithm is run, each time with
+different random starting conditions}
+
+\item{n.start.kmeans}{the number of times the K-means algorithm is run to
+define the starting point of the ML-EM algorithm, each time with
+different random starting conditions}
+
+\item{hybrids}{a logical indicating if hybrids should be modelled
+explicitely; this is currently implemented for 2 groups only.}
+
+\item{dim.ini}{the number of PCA axes to retain in the dimension reduction
+step for \code{\link{find.clusters}}, if this method is used to define
+initial group memberships (see argument \code{pop.ini}).}
+
+\item{hybrid.coef}{a vector of hybridization coefficients, defining the
+proportion of hybrid gene pool coming from the first parental population;
+this is symmetrized around 0.5, so that e.g. c(0.25, 0.5) will be
+converted to c(0.25, 0.5, 0.75)}
+
+\item{parent.lab}{a vector of 2 character strings used to label the two
+parental populations; only used if hybrids are detected (see argument
+\code{hybrids})}
+
+\item{...}{further arguments passed on to \code{\link{find.clusters}}}
+}
+\value{
+The function \code{snapclust} returns a list with the following
+components:
+\itemize{
+
+\item \code{$group} a factor indicating the maximum-likelihood assignment of
+individuals to groups; if identified, hybrids are labelled after
+hybridization coefficients, e.g. 0.5_A - 0.5_B for F1, 0.75_A - 0.25_B for
+backcross F1 / A, etc.
+
+\item \code{$ll}: the log-likelihood of the model
+
+\item \code{$proba}: a matrix of group membership probabilities, with
+individuals in rows and groups in columns; each value correspond to the
+probability that a given individual genotype was generated under a given
+group, under Hardy-Weinberg hypotheses.
+
+\item \code{$converged} a logical indicating if the algorithm converged; if
+FALSE, it is doubtful that the result is an actual Maximum Likelihood
+estimate.
+
+\item \code{$n.iter} an integer indicating the number of iterations the EM
+algorithm was run for.
+
+}
+}
+\description{
+Do not use. We work on that stuff. Contact us if interested.
+}
+\examples{
+\dontrun{
+data(microbov)
+
+## try function using k-means initialization
+grp.ini <- find.clusters(microbov, n.clust=15, n.pca=150)
+
+## run EM algo
+res <- snapclust(microbov, 15, pop.ini = grp.ini$grp)
+names(res)
+res$converged
+res$n.iter
+
+## plot result
+compoplot(res)
+
+## flag potential hybrids
+to.flag <- apply(res$proba,1,max)<.9
+compoplot(res, subset=to.flag, show.lab=TRUE,
+ posi="bottomleft", bg="white")
+
+
+## Simulate hybrids F1
+zebu <- microbov[pop="Zebu"]
+salers <- microbov[pop="Salers"]
+hyb <- hybridize(zebu, salers, n=30)
+x <- repool(zebu, salers, hyb)
+
+## method without hybrids
+res.no.hyb <- snapclust(x, k=2, hybrids=FALSE)
+compoplot(res.no.hyb, col.pal=spectral, n.col=2)
+
+## method with hybrids
+res.hyb <- snapclust(x, k=2, hybrids=TRUE)
+compoplot(res.hyb, col.pal =
+ hybridpal(col.pal = spectral), n.col = 2)
+
+
+## Simulate hybrids backcross (F1 / parental)
+f1.zebu <- hybridize(hyb, zebu, 20, pop = "f1.zebu")
+f1.salers <- hybridize(hyb, salers, 25, pop = "f1.salers")
+y <- repool(x, f1.zebu, f1.salers)
+
+## method without hybrids
+res2.no.hyb <- snapclust(y, k = 2, hybrids = FALSE)
+compoplot(res2.no.hyb, col.pal = hybridpal(), n.col = 2)
+
+## method with hybrids F1 only
+res2.hyb <- snapclust(y, k = 2, hybrids = TRUE)
+compoplot(res2.hyb, col.pal = hybridpal(), n.col = 2)
+
+## method with back-cross
+res2.back <- snapclust(y, k = 2, hybrids = TRUE, hybrid.coef = c(.25,.5))
+ compoplot(res2.hyb, col.pal = hybridpal(), n.col = 2)
+
+}
+}
+\author{
+Thibaut Jombart \email{thibautjombart at gmail.com} and Marie-Pauline
+Beugin
+}
diff --git a/man/snapclust.choose.k.Rd b/man/snapclust.choose.k.Rd
new file mode 100644
index 0000000..5903dc7
--- /dev/null
+++ b/man/snapclust.choose.k.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/snapclust.choose.k.R
+\name{snapclust.choose.k}
+\alias{snapclust.choose.k}
+\title{Choose the number of clusters for snapclust using BIC}
+\usage{
+snapclust.choose.k(max, ..., IC = AIC, IC.only = TRUE)
+}
+\arguments{
+\item{max}{An integer indicating the maximum number of clusters to seek;
+\code{\link{snapclust}} will be run for all k from 2 to max.}
+
+\item{...}{Arguments passed to \code{\link{snapclust}}.}
+
+\item{IC}{A function computing the information criterion for
+\code{\link{snapclust}} objects. Available statistics are
+\code{AIC} (default), \code{AICc}, and \code{BIC}.}
+
+\item{IC.only}{A logical (TRUE by default) indicating if IC values only
+should be returned; if \code{FALSE}, full \code{snapclust} objects are
+returned.}
+}
+\description{
+Do not use. We work on that stuff. Contact us if interested.
+}
+\seealso{
+\code{\link{snapclust}} to generate individual clustering solutions,
+and \code{\link{BIC.snapclust}} for computing BIC for \code{snapclust}
+objects.
+}
+\author{
+Thibaut Jombart \email{thibautjombart at gmail.com}
+}
diff --git a/man/spca.Rd b/man/spca.Rd
index 45de115..296b3c1 100644
--- a/man/spca.Rd
+++ b/man/spca.Rd
@@ -1,43 +1,94 @@
\encoding{UTF-8}
\name{spca}
\alias{spca}
+\alias{spca.default}
+\alias{spca.matrix}
+\alias{spca.data.frame}
+\alias{spca.genind}
+\alias{spca.genpop}
\alias{print.spca}
\alias{summary.spca}
\alias{plot.spca}
\alias{screeplot.spca}
\alias{colorplot.spca}
\title{Spatial principal component analysis}
-\description{These functions are designed to perform a spatial principal
- component analysis and to display the results. They call upon
- \code{multispati} from the \code{ade4} package.
-
- \code{spca} performs the spatial component analysis. Other
- functions are:\cr
-
- - \code{print.spca}: prints the spca content\cr
+\description{
+
+ These functions implement the spatial principal component analysis
+ (sPCA). The function \code{spca} is a generic with methods for:
+ \itemize{
+ \item \code{matrix}: only numeric values are accepted
+ \item \code{data.frame}: same as for matrices
+ \item \code{genind}: any \linkS4class{genind} object is accepted
+ \item \code{genpop}: any \linkS4class{genpop} object is accepted
+ }
- - \code{summary.spca}: gives variance and autocorrelation\cr
- statistics
+ The core computation use \code{multispati} from the \code{ade4} package.\cr
- - \code{plot.spca}: usefull graphics (connection network, 3 different
- representations of map of scores, eigenvalues barplot and
- decomposition)\cr
+ Besides the set of \code{spca} functions, other functions include:
+ \itemize{
+
+ \item \code{print.spca}: prints the spca content
+
+ \item \code{summary.spca}: gives variance and autocorrelation
+ statistics
+
+ \item \code{plot.spca}: usefull graphics (connection network, 3 different
+ representations of map of scores, eigenvalues barplot and
+ decomposition)
- - \code{screeplot.spca}: decomposes spca eigenvalues into variance and
- autocorrelation\cr
+ \item \code{screeplot.spca}: decomposes spca eigenvalues into variance and
+ autocorrelation
+
+ \item \code{colorplot.spca}: represents principal components of sPCA in
+ space using the RGB system.
+
+ }
- - \code{colorplot.spca}: represents principal components of sPCA in
- space using the RGB system.\cr
-
- A tutorial describes how to perform a sPCA: see
- \url{http://adegenet.r-forge.r-project.org/files/tutorial-spca.pdf} or
- type \code{adegenetTutorial(which="spca")}.
+ A tutorial on sPCA can be opened using:\cr
+ \code{adegenetTutorial(which="spca")}.
}
\usage{
-spca(obj, xy=NULL, cn=NULL, matWeight=NULL,
- scale=FALSE, scannf=TRUE, nfposi=1, nfnega=1,
- type=NULL, ask=TRUE, plot.nb=TRUE, edit.nb=FALSE,
- truenames=TRUE, d1=NULL, d2=NULL, k=NULL, a=NULL, dmin=NULL)
+spca(...)
+
+\method{spca}{default}(x, ...)
+
+\method{spca}{matrix}(x, xy = NULL, cn = NULL, matWeight = NULL,
+ center = TRUE, scale = FALSE, scannf = TRUE,
+ nfposi = 1, nfnega = 1,
+ type = NULL, ask = TRUE,
+ plot.nb = TRUE, edit.nb = FALSE,
+ truenames = TRUE,
+ d1 = NULL, d2 = NULL, k = NULL,
+ a = NULL, dmin = NULL, ...)
+
+\method{spca}{data.frame}(x, xy = NULL, cn = NULL, matWeight = NULL,
+ center = TRUE, scale = FALSE, scannf = TRUE,
+ nfposi = 1, nfnega = 1,
+ type = NULL, ask = TRUE,
+ plot.nb = TRUE, edit.nb = FALSE,
+ truenames = TRUE,
+ d1 = NULL, d2 = NULL, k = NULL,
+ a = NULL, dmin = NULL, ...)
+
+\method{spca}{genind}(obj, xy = NULL, cn = NULL, matWeight = NULL,
+ scale = FALSE, scannf = TRUE,
+ nfposi = 1, nfnega = 1,
+ type = NULL, ask = TRUE,
+ plot.nb = TRUE, edit.nb = FALSE,
+ truenames = TRUE,
+ d1 = NULL, d2 = NULL, k = NULL,
+ a = NULL, dmin = NULL, ...)
+
+\method{spca}{genpop}(obj, xy = NULL, cn = NULL, matWeight = NULL,
+ scale = FALSE, scannf = TRUE,
+ nfposi = 1, nfnega = 1,
+ type = NULL, ask = TRUE,
+ plot.nb = TRUE, edit.nb = FALSE,
+ truenames = TRUE,
+ d1 = NULL, d2 = NULL, k = NULL,
+ a = NULL, dmin = NULL, ...)
+
\method{print}{spca}(x, \dots)
@@ -50,61 +101,95 @@ spca(obj, xy=NULL, cn=NULL, matWeight=NULL,
\method{colorplot}{spca}(x, axes=1:ncol(x$li), useLag=FALSE, \dots)
}
\arguments{
+
+ \item{x}{a \code{matrix} or a \code{data.frame} of numeric values,
+ with individuals in rows and variables in columns; categorical
+ variables with a binary coding are acceptable too; for \code{print}
+ and plotting functions, a spca object.}
+
\item{obj}{a \code{genind} or \code{genpop} object.}
+
\item{xy}{a matrix or data.frame with two columns for x and y
coordinates. Seeked from obj\$other\$xy if it exists when xy is not
provided. Can be NULL if a \code{nb} object is provided in
- \code{cn}.\cr
- Longitude/latitude coordinates should be converted first by a given
- projection (see 'See Also' section).}
+ \code{cn}.\cr Longitude/latitude coordinates should be converted
+ first by a given projection (see 'See Also' section).}
+
\item{cn}{a connection network of the class 'nb' (package spdep). Can
be NULL if xy is provided. Can be easily obtained using the function
chooseCN (see details).}
+
\item{matWeight}{a square matrix of spatial weights, indicating the
spatial proximities between entities. If provided, this argument
prevails over \code{cn} (see details).}
- \item{scale}{a logical indicating whether alleles should be scaled to
- unit variance (TRUE) or not (FALSE, default).}
+
+ \item{center}{a logical indicating whether data should be centred to
+ a mean of zero; used implicitely for \linkS4class{genind} or
+ \linkS4class{genpop} objects.}
+
+ \item{scale}{a logical indicating whether data should be scaled to
+ unit variance (TRUE) or not (FALSE, default).}
+
\item{scannf}{a logical stating whether eigenvalues should be chosen
interactively (TRUE, default) or not (FALSE).}
- \item{nfposi}{an integer giving the number of positive eigenvalues retained
- ('global structures').}
- \item{nfnega}{an integer giving the number of negative eigenvalues retained
- ('local structures').}
+
+ \item{nfposi}{an integer giving the number of positive eigenvalues
+ retained ('global structures').}
+
+ \item{nfnega}{an integer giving the number of negative eigenvalues
+ retained ('local structures').}
+
\item{type}{an integer giving the type of graph (see details in
- \code{chooseCN} help page). If provided, \code{ask} is set to FALSE.}
+ \code{chooseCN} help page). If provided, \code{ask} is set to
+ FALSE.}
+
\item{ask}{a logical stating whether graph should be chosen
interactively (TRUE,default) or not (FALSE).}
+
\item{plot.nb}{a logical stating whether the resulting graph should be
- plotted (TRUE, default) or not (FALSE).}
+ plotted (TRUE, default) or not (FALSE).}
+
\item{edit.nb}{a logical stating whether the resulting graph should be
- edited manually for corrections (TRUE) or not (FALSE, default).}
+ edited manually for corrections (TRUE) or not (FALSE, default).}
+
\item{truenames}{a logical stating whether true names should be used
for 'obj' (TRUE, default) instead of generic labels (FALSE)}
+
\item{d1}{the minimum distance between any two neighbours. Used if
\code{type=5.}}
+
\item{d2}{the maximum distance between any two neighbours. Used if
\code{type=5}.}
- \item{k}{the number of neighbours per point. Used if
- \code{type=6}.}
+
+ \item{k}{the number of neighbours per point. Used if \code{type=6}.}
+
\item{a}{the exponent of the inverse distance matrix. Used if
\code{type=7}.}
+
\item{dmin}{the minimum distance between any two distinct points. Used
to avoid infinite spatial proximities (defined as the inversed
spatial distances). Used if \code{type=7}.}
- \item{x}{a \code{spca} object.}
+
\item{object}{a \code{spca} object.}
+
\item{printres}{a logical stating whether results should be printed on
the screen (TRUE, default) or not (FALSE).}
+
\item{axis}{an integer between 1 and (nfposi+nfnega) indicating which
axis should be plotted.}
+
\item{main}{a title for the screeplot; if NULL, a default one is
used.}
+
\item{\dots}{further arguments passed to other methods.}
+
\item{axes}{the index of the columns of X to be represented. Up to
three axes can be chosen.}
+
\item{useLag}{a logical stating whether the lagged components
- (\code{x\$ls}) should be used instead of the components (\code{x\$li}).}
+ (\code{x\$ls}) should be used instead of the components
+ (\code{x\$li}).}
+
}
\details{
The spatial principal component analysis (sPCA) is designed to
diff --git a/man/spcaIllus.Rd b/man/spcaIllus.Rd
index 9c8a428..b303b22 100644
--- a/man/spcaIllus.Rd
+++ b/man/spcaIllus.Rd
@@ -111,9 +111,6 @@ detach(spcaIllus)
}
-\author{
-Thibaut Jombart \email{t.jombart at imperial.ac.uk}
-}
\references{
Jombart, T., Devillard, S., Dufour, A.-B. and Pontier, D.
Revealing cryptic spatial patterns in genetic variability by a new
@@ -125,6 +122,8 @@ genetics simulations \emph{Journal of Heredity}, \bold{92}: 301-302
\seealso{
\code{\link{spca}}
}
+\author{
+Thibaut Jombart \email{t.jombart at imperial.ac.uk}
+}
\keyword{datasets}
\keyword{spatial}
-
diff --git a/man/spca_randtest.Rd b/man/spca_randtest.Rd
new file mode 100644
index 0000000..819fbc3
--- /dev/null
+++ b/man/spca_randtest.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/spca_randtest.R
+\name{spca_randtest}
+\alias{spca_randtest}
+\title{Monte Carlo test for sPCA}
+\usage{
+spca_randtest(x, nperm = 499)
+}
+\arguments{
+\item{x}{A \code{\link{spca}} object.}
+
+\item{nperm}{The number of permutations to be used for the test.}
+}
+\value{
+A list with two objects of the class 'randtest' (see
+\code{\link[ade4]{as.randtest}}), the first one for 'global' structures
+(positivie autocorrelation) and the second for 'local' structures (negative
+autocorrelation).
+}
+\description{
+The function \code{spca_randtest} implements Monte-Carlo tests for the
+presence of significant spatial structures in a sPCA object. Two tests are
+run, for global (positive autocorrelation) and local (negative
+autocorrelation) structures, respectively. The test statistics used are the
+sum of the absolute values of the corresponding eigenvalues.
+}
+\examples{
+
+\dontrun{
+## Load data
+data(sim2pop)
+
+## Make spca
+spca1 <- spca(sim2pop, type = 1, scannf = FALSE, plot.nb = FALSE)
+
+spca1
+plot(spca1)
+
+## run tests (use more permutations in practice, e.g. 999)
+tests <- spca_randtest(spca1, nperm = 49)
+
+## check results
+tests
+plot(tests[[1]]) # global structures
+
+}
+
+}
+\author{
+Original code by Valeria Montano adapted by Thibaut Jombart.
+}
diff --git a/man/strata-methods.Rd b/man/strata-methods.Rd
index 190bbfb..11d7510 100644
--- a/man/strata-methods.Rd
+++ b/man/strata-methods.Rd
@@ -2,12 +2,12 @@
% Please edit documentation in R/strataMethods.R
\docType{methods}
\name{strata}
-\alias{addStrata}
-\alias{addStrata,genind-method}
-\alias{addStrata,genlight-method}
-\alias{addStrata<-}
-\alias{addStrata<-,genind-method}
-\alias{addStrata<-,genlight-method}
+\alias{strata}
+\alias{strata,genind-method}
+\alias{strata,genlight-method}
+\alias{strata<-}
+\alias{strata<-,genind-method}
+\alias{strata<-,genlight-method}
\alias{nameStrata}
\alias{nameStrata,genind-method}
\alias{nameStrata,genlight-method}
@@ -20,12 +20,12 @@
\alias{splitStrata<-}
\alias{splitStrata<-,genind-method}
\alias{splitStrata<-,genlight-method}
-\alias{strata}
-\alias{strata,genind-method}
-\alias{strata,genlight-method}
-\alias{strata<-}
-\alias{strata<-,genind-method}
-\alias{strata<-,genlight-method}
+\alias{addStrata}
+\alias{addStrata,genind-method}
+\alias{addStrata,genlight-method}
+\alias{addStrata<-}
+\alias{addStrata<-,genind-method}
+\alias{addStrata<-,genlight-method}
\title{Access and manipulate the population strata for genind or genlight objects.}
\usage{
strata(x, formula = NULL, combine = TRUE, value)
@@ -159,11 +159,10 @@ microbov # Now we have all of our strata named and split
head(strata(microbov)) # all strata are appropriately named and split.
}
}
-\author{
-Zhian N. Kamvar
-}
\seealso{
\code{\link{setPop}} \code{\link{genind}}
\code{\link{as.genind}}
}
-
+\author{
+Zhian N. Kamvar
+}
diff --git a/man/tab.Rd b/man/tab.Rd
index 2fcc40e..a4eb9be 100644
--- a/man/tab.Rd
+++ b/man/tab.Rd
@@ -44,4 +44,3 @@ head(tab(microbov,freq=TRUE))
}
-
diff --git a/man/web.Rd b/man/web.Rd
index 6186b7e..10774e6 100644
--- a/man/web.Rd
+++ b/man/web.Rd
@@ -1,9 +1,9 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/auxil.R
\name{adegenetWeb}
-\alias{adegenetIssues}
-\alias{adegenetTutorial}
\alias{adegenetWeb}
+\alias{adegenetTutorial}
+\alias{adegenetIssues}
\title{Functions to access online resources for adegenet}
\usage{
adegenetWeb()
@@ -38,4 +38,3 @@ Available tutorials are:
\item 'strata': introduction to hierarchical population structure in adegenet
}
}
-
diff --git a/man/xvalDapc.Rd b/man/xvalDapc.Rd
index bf23b11..2ab93dc 100644
--- a/man/xvalDapc.Rd
+++ b/man/xvalDapc.Rd
@@ -1,8 +1,11 @@
\encoding{UTF-8}
\name{DAPC cross-validation}
\alias{xvalDapc}
+\alias{xvalDapc.default}
\alias{xvalDapc.data.frame}
\alias{xvalDapc.matrix}
+\alias{xvalDapc.genlight}
+\alias{xvalDapc.genind}
\title{Cross-validation for Discriminant Analysis of Principal Components (DAPC)}
\description{
The function \code{xvalDapc} performs stratified cross-validation of DAPC
@@ -11,12 +14,13 @@
\code{data.frame} and \code{matrix}.\cr
}
\usage{
-xvalDapc(x, grp, n.pca.max = 300, n.da = NULL,
+xvalDapc(x, \dots)
+
+\method{xvalDapc}{default}(x, grp, n.pca.max = 300, n.da = NULL,
training.set = 0.9, result = c("groupMean", "overall"),
center = TRUE, scale = FALSE,
n.pca=NULL, n.rep = 30, xval.plot = TRUE, \dots)
-
\method{xvalDapc}{data.frame}(x, grp, n.pca.max = 300, n.da = NULL,
training.set = 0.9, result = c("groupMean", "overall"),
center = TRUE, scale = FALSE,
@@ -26,6 +30,10 @@ xvalDapc(x, grp, n.pca.max = 300, n.da = NULL,
training.set = 0.9, result = c("groupMean", "overall"),
center = TRUE, scale = FALSE,
n.pca=NULL, n.rep = 30, xval.plot = TRUE, \dots)
+
+\method{xvalDapc}{genlight}(x, \dots)
+
+\method{xvalDapc}{genind}(x, \dots)
}
\arguments{
\item{x}{\code{a data.frame} or a \code{matrix} used as input of DAPC.}
diff --git a/src/init.c b/src/init.c
new file mode 100644
index 0000000..8c97c0f
--- /dev/null
+++ b/src/init.c
@@ -0,0 +1,32 @@
+#include <stdlib.h> // for NULL
+#include <R_ext/Rdynload.h>
+
+/* FIXME:
+ Check these declarations against the C/Fortran source code.
+*/
+
+/* .C calls */
+extern void bytesToBinInt(void *, void *, void *);
+extern void bytesToInt(void *, void *, void *, void *, void *);
+extern void CheckAllSeg(void *, void *, void *, void *, void *, void *);
+extern void GLdotProd(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
+extern void GLsumFreq(void *, void *, void *, void *, void *, void *, void *, void *, void *);
+extern void GLsumInt(void *, void *, void *, void *, void *, void *, void *, void *, void *);
+extern void nb_shared_all(void *, void *, void *, void *);
+
+static const R_CMethodDef CEntries[] = {
+ {"bytesToBinInt", (DL_FUNC) &bytesToBinInt, 3},
+ {"bytesToInt", (DL_FUNC) &bytesToInt, 5},
+ {"CheckAllSeg", (DL_FUNC) &CheckAllSeg, 6},
+ {"GLdotProd", (DL_FUNC) &GLdotProd, 12},
+ {"GLsumFreq", (DL_FUNC) &GLsumFreq, 9},
+ {"GLsumInt", (DL_FUNC) &GLsumInt, 9},
+ {"nb_shared_all", (DL_FUNC) &nb_shared_all, 4},
+ {NULL, NULL, 0}
+};
+
+void R_init_adegenet(DllInfo *dll)
+{
+ R_registerRoutines(dll, CEntries, NULL, NULL, NULL);
+ R_useDynamicSymbols(dll, FALSE);
+}
diff --git a/tests/testthat/test-findclust.R b/tests/testthat/test-findclust.R
new file mode 100644
index 0000000..be2c700
--- /dev/null
+++ b/tests/testthat/test-findclust.R
@@ -0,0 +1,27 @@
+context("find.clusters tests")
+
+test_that("find.clusters works with pre-defined clusters", {
+ skip_on_cran()
+ data(nancycats)
+
+ # set connection
+ f <- file()
+ options(adegenet.testcon = f)
+
+ # add data to connection
+ twos <- paste(rep(2, nPop(nancycats)), collapse = "\n")
+ write(twos, f)
+
+ # test function
+ expect_warning(capture.output(res <- find.clusters(nancycats, clust = pop(nancycats), n.pca = 100)))
+ # We expect each group to receive two clusters
+ expect_equal(length(levels(res$grp)), nPop(nancycats) * 2)
+ # We expect all individuals accounted for
+ expect_equal(length(res$grp), nInd(nancycats))
+
+ # reset variable
+ options(adegenet.testcon = stdin())
+ # close connection
+ close(f)
+})
+
diff --git a/tests/testthat/test_accessors.R b/tests/testthat/test_accessors.R
index 8bf08b2..1804496 100644
--- a/tests/testthat/test_accessors.R
+++ b/tests/testthat/test_accessors.R
@@ -147,3 +147,29 @@ test_that("tab will return frequencies for PA data", {
res <- tab(apop)/rowSums(tab(apop))
expect_equivalent(atab, res)
})
+
+test_that("subsettors give one warning for individuals", {
+ skip_on_cran()
+ expect_warning(microbov[c("bippity", "hop", "bop")], "the following specified individuals do not exist: bippity, hop, bop")
+ expect_warning(microbov[c("bippity", "hop", "bop")], "no individual selected - ignoring")
+})
+
+
+test_that("subsettors give one warning for loci", {
+ skip_on_cran()
+ expect_warning(microbov[loc = c("bippity", "hop", "bop")], "the following specified loci do not exist: bippity, hop, bop")
+ expect_warning(microbov[loc = c("bippity", "hop", "bop")], "no loci selected - ignoring")
+})
+
+test_that("subsettors give one warning for populations", {
+ skip_on_cran()
+ expect_warning(microbov[pop = c("bippity", "hop", "bop")], "the following specified populations do not exist: bippity, hop, bop")
+ expect_warning(microbov[pop = c("bippity", "hop", "bop")], "no populations selected - ignoring")
+})
+
+test_that("subsettors give one warning for genpop objects", {
+ skip_on_cran()
+ micropop <- genind2genpop(microbov, quiet = TRUE)
+ expect_warning(micropop[c("bippity", "hop", "bop")], "the following specified populations do not exist: bippity, hop, bop")
+ expect_warning(micropop[c("bippity", "hop", "bop")], "no population selected - ignoring")
+})
\ No newline at end of file
diff --git a/tests/testthat/test_compoplot.R b/tests/testthat/test_compoplot.R
new file mode 100644
index 0000000..0221e1b
--- /dev/null
+++ b/tests/testthat/test_compoplot.R
@@ -0,0 +1,28 @@
+context("Compoplot Tests")
+
+mat <- t(apply(matrix(sample(10, 30, replace = TRUE), ncol = 3), 1, function(x) x/sum(x)))
+
+test_that("compoplot.matrix works", {
+ skip_on_cran()
+ expect_silent(compoplot(mat))
+})
+
+test_that("compoplot works with a custom palette", {
+ skip_on_cran()
+ expect_silent(compoplot(mat, col.pal = c("cyan", "magenta", "yellow")))
+})
+
+test_that("compoplot works with a named custom palette", {
+ skip_on_cran()
+ expect_silent(compoplot(mat, col.pal = c(`1`="cyan", `2`="magenta", `3`="yellow")))
+})
+
+test_that("compoplot throws a warning if there are too many colors", {
+ skip_on_cran()
+ expect_warning(compoplot(mat, funky(4)), "populations fewer than")
+})
+
+test_that("compoplot throws a warning if there are not enough colors and does its own thing", {
+ skip_on_cran()
+ expect_warning(compoplot(mat, c("red", "blue")), "Using funky()")
+})
\ No newline at end of file
diff --git a/tests/testthat/test_genlight.R b/tests/testthat/test_genlight.R
index 0c2ae5b..b4c1e90 100644
--- a/tests/testthat/test_genlight.R
+++ b/tests/testthat/test_genlight.R
@@ -102,5 +102,5 @@ test_that("missing data is properly subset with a character vector", {
test_that("genlight objects do not take a mixture of positive and negative subscripts", {
skip_on_cran()
- expect_error(xx[, c(2, -1)], "subscripts.")
+ expect_error(xx[, c(2, -1)], ".* may be mixed with negative subscript")
})
diff --git a/tests/testthat/test_haploGen.R b/tests/testthat/test_haploGen.R
new file mode 100644
index 0000000..e977cac
--- /dev/null
+++ b/tests/testthat/test_haploGen.R
@@ -0,0 +1,5 @@
+context("haploGen tests")
+
+test_that("haploGen actually works", {
+ expect_is(haploGen(seq.length = 30, geo.sim = TRUE), "haploGen")
+})
\ No newline at end of file
diff --git a/tests/testthat/test_import.R b/tests/testthat/test_import.R
index 0519a29..0620e1a 100644
--- a/tests/testthat/test_import.R
+++ b/tests/testthat/test_import.R
@@ -2,13 +2,13 @@ context("Import Tests")
test_that("df2genind works with haploids", {
skip_on_cran()
- x <- matrix(sample(20), nrow = 10, ncol = 2)
+ x <- matrix(as.character(sample(20)), nrow = 10, ncol = 2)
res <- df2genind(x, ploidy = 1)
expect_that(sum(res at loc.n.all), equals(20))
expect_that(nInd(res), equals(10))
expect_that(nLoc(res), equals(2))
resdf <- genind2df(res)
- expect_that(as.matrix(resdf), is_equivalent_to(x))
+ all.equal(unlist(resdf, use.names=FALSE), as.vector(x))
})
test_that("df2genind makes sense for given example", {
@@ -29,7 +29,7 @@ test_that("df2genind makes sense for given example", {
test_that("df2genind handles NAs for 'numerically named' samples correctly", {
skip_on_cran()
-
+
df <- read.table(text = "
AnimalID,Samp,INRA21,AHT137,REN169D01,AHTk253
730,AX.0CCE,092 098,132 132,NA,284 286
@@ -37,15 +37,15 @@ AnimalID,Samp,INRA21,AHT137,REN169D01,AHTk253
677,AP.088P,092 096,140 146,204 204,280 280
678,AP.088T,096 098,124 148,198 204,280 280
544,AP.07XM,096 098,134 146,198 198,280 286
-533,AP.07UM,092 098,134 148,198 204,280 286",
+533,AP.07UM,092 098,134 148,198 204,280 286",
header = TRUE, sep = ",", colClasses = rep("factor", 6))
-
+
obj <- df2genind(X = df[, !grepl("AnimalID|Samp", colnames(df))], ind.names = df$AnimalID,
sep = " ", ncode = 6)
g <- tab(obj)
- expect_that(g["730", grepl("REN169D01", colnames(g))],
- is_equivalent_to(c(REN169D01.204 = as.integer(NA),
- REN169D01.208 = as.integer(NA),
+ expect_that(g["730", grepl("REN169D01", colnames(g))],
+ is_equivalent_to(c(REN169D01.204 = as.integer(NA),
+ REN169D01.208 = as.integer(NA),
REN169D01.198 = as.integer(NA)))
)
})
@@ -72,7 +72,7 @@ test_that("read.X functions work as expected", {
fsta <- read.fstat(system.file("files/nancycats.dat",package="adegenet"), quiet = TRUE)
gntx <- read.genetix(system.file("files/nancycats.gtx",package="adegenet"), quiet = TRUE)
stru <- read.structure(system.file("files/nancycats.str",package="adegenet"),
- onerowperind=FALSE, n.ind=237, n.loc=9, col.lab=1,
+ onerowperind=FALSE, n.ind=237, n.loc=9, col.lab=1,
col.pop=2, ask=FALSE, quiet = TRUE)
data("nancycats", package = "adegenet")
# Making sure that the populations are all named the same. The order of the
@@ -81,7 +81,7 @@ test_that("read.X functions work as expected", {
levels(pop(fsta)) <- levels(pop(nancycats))
levels(pop(gntx)) <- levels(pop(nancycats))
levels(pop(stru)) <- levels(pop(nancycats))
-
+
# Ensuring that the locus and population summaries are equivalent
summary_stats <- summary(nancycats)
expect_equivalent(summary(gpop), summary_stats)
@@ -93,39 +93,141 @@ test_that("read.X functions work as expected", {
test_that("read.genpop can import duplicate names", {
skip_on_cran()
x <- "
- Microsat on Chiracus radioactivus, a pest species
- Loc1, Loc2, Loc3, Y-linked, Loc4
-POP
-AA8, 0405 0711 0304 0000 0505
-AA9, 0405 0609 0208 0000 0505
-A10, 0205 0609 0101 0000 0305
-A11, 0405 0606 0102 0000 0504
-A12, 0202 0609 0105 0000 0507
-A13, 0505 0909 0107 0000 0505
-A14, 0202 0609 0207 0000 0503
-A15, 0405 0609 0101 0000 0505
+ Microsat on Chiracus radioactivus, a pest species
+ Loc1, Loc2, Loc3, Y-linked, Loc4
+POP
+AA8, 0405 0711 0304 0000 0505
+AA9, 0405 0609 0208 0000 0505
+A10, 0205 0609 0101 0000 0305
+A11, 0405 0606 0102 0000 0504
+A12, 0202 0609 0105 0000 0507
+A13, 0505 0909 0107 0000 0505
+A14, 0202 0609 0207 0000 0503
+A15, 0405 0609 0101 0000 0505
Pop
-AF, 0000 0000 0000 0000 0505
-AF, 0205 0307 0102 0000 0505
-AF, 0202 0609 0202 0000 0505
-AF, 0205 0909 0000 0000 0505
-AF, 0205 0307 0202 0000 0505
-AF, 0505 0303 0102 0000 0505
-AF, 0205 0700 0000 0000 0505
-AF, 0505 0900 0000 0000 0405
-AF, 0205 0600 0000 0000 0505
-AF, 0505 0606 0202 0000 0505
-pop
-C45, 0505 0606 0202 0000 0505
-C45, 0505 0909 0202 0000 0505
-C45, 0505 0306 0202 0000 0505
-C45, 0505 0909 0102 0000 0405
-C45, 0205 0303 0202 0000 0505
-C45, 0205 0909 0202 0000 0405
+AF, 0000 0000 0000 0000 0505
+AF, 0205 0307 0102 0000 0505
+AF, 0202 0609 0202 0000 0505
+AF, 0205 0909 0000 0000 0505
+AF, 0205 0307 0202 0000 0505
+AF, 0505 0303 0102 0000 0505
+AF, 0205 0700 0000 0000 0505
+AF, 0505 0900 0000 0000 0405
+AF, 0205 0600 0000 0000 0505
+AF, 0505 0606 0202 0000 0505
+pop
+C45, 0505 0606 0202 0000 0505
+C45, 0505 0909 0202 0000 0505
+C45, 0505 0306 0202 0000 0505
+C45, 0505 0909 0102 0000 0405
+C45, 0205 0303 0202 0000 0505
+C45, 0205 0909 0202 0000 0405
"
tmp <- tempfile(fileext = ".gen")
cat(x, file = tmp)
expect_warning(gp <- read.genepop(tmp))
expect_identical(indNames(gp), .genlab("", nInd(gp)))
+
+})
+
+
+
+test_that("df2genind can handle periods in input", {
+ skip_on_cran()
+ dat <-
+ data.frame(
+ pop = c(1, 1, 2, 2),
+ loc1 = c("1/1", "1/2", "1.1/2", "2/2"),
+ loc2 = c("1/1", "1/2", "1/2", "2/2")
+ )
+ expect_warning(datgi <- df2genind(dat[, -1], sep = "/", pop = dat[, 1]))
+ expect_equivalent(alleles(datgi)$loc1, c("1", "2", "1_1"))
+})
+
+test_that("df2genind can handle periods in input with underscore separator", {
+ skip_on_cran()
+ dat <-
+ data.frame(
+ pop = c(1, 1, 2, 2),
+ loc1 = c("1/1", "1/2", "1.1/2", "2/2"),
+ loc2 = c("1/1", "1/2", "1/2", "2/2")
+ )
+ dat <- apply(dat, 2, function(i) gsub("/", "_", i))
+ expect_warning(datgi <- df2genind(dat[, -1], sep = "_", pop = dat[, 1]))
+ expect_equivalent(alleles(datgi)$loc1, c("1", "2", "1p1"))
+})
+
+
+test_that("different imports sort populations in the same way", {
+ skip_on_cran()
+
+ ## read nancycats data from different formats
+ x.str <- read.structure(system.file("files/nancycats.str",package="adegenet"),
+ onerowperind=FALSE, n.ind=237, n.loc=9, col.lab=1, col.pop=2, ask=FALSE)
+ x.gen <- read.genepop(system.file("files/nancycats.gen",package="adegenet"))
+ x.dat <- read.fstat(system.file("files/nancycats.dat",package="adegenet"))
+ x.gtx <- read.genetix(system.file("files/nancycats.gtx",package="adegenet"))
+
+ ## check that the pop are identical:
+
+ ## we use 'table(pop(...))' because individuals may be sorted differently in the files, so
+ ## 'pop(...)' may be different
+
+ identical(table(pop(x.gen)), table(pop(x.str)))
+ identical(table(pop(x.gen)), table(pop(x.dat)))
+ identical(table(pop(x.gen)), table(pop(x.gtx)))
+})
+
+
+test_that("ensure importing structure files with numbers for locus names imports correectly", {
+ skip_on_cran()
+
+ # Column names should have no extra characters in front of them. Your IDE
+ # may be adding them, so watch out!
+ cat(print(" 1_25 8_54 1358_15 1363_12 1368_57 1369_41 1372_14 1373_9 1377_42 1378_53 1379_10 1382_37 1386_27 1398_46 1400_9 1401_25 1403_13 1404_17 1409_42 1416_48 1419_11 1421_14 1423_5 1424_74 1426_55 1429_46 1432_23 1435_30 1436_7 1438_9 1443_37
+A_KH1584 A 1 4 4 1 1 3 2 4 4 2 3 3 2 4 1 3 1 1 2 3 1 4 4 3 2 2 3 4 4 4 2
+A_KH1584 A 1 4 4 1 1 3 2 4 4 4 3 3 4 4 1 3 1 3 2 3 3 4 4 3 4 2 3 4 4 4 2
+C_KH1059 C 0 4 4 1 1 3 2 4 4 2 1 3 2 4 1 3 1 3 2 3 3 2 4 3 2 2 3 2 4 4 2
+C_KH1059 C 0 4 4 1 1 3 2 4 4 4 3 3 2 4 1 3 1 3 2 3 3 4 4 3 2 2 3 4 4 4 2
+M_KH1834 M 0 2 2 1 1 3 2 4 4 2 3 3 2 4 1 3 1 1 2 3 3 4 4 3 2 2 3 2 4 4 2
+M_KH1834 M 0 4 4 1 3 3 2 4 4 2 3 3 2 4 1 3 1 3 2 3 3 4 4 3 2 4 3 4 4 4 2
+M_KH1837 M 1 4 4 1 1 3 2 4 4 0 3 3 2 2 1 3 1 1 2 3 3 4 4 3 4 2 3 4 4 4 2
+M_KH1837 M 1 4 4 1 3 3 2 4 4 0 3 3 4 4 1 3 1 3 2 3 3 4 4 3 4 2 3 4 4 4 2"),
+ file = "elizabeth_starts_with_number.stru")
-})
\ No newline at end of file
+ xy1 <- read.structure("elizabeth_starts_with_number.stru", NA.char="0",
+ n.ind = 4, n.loc = 31, onerowperind = FALSE,
+ col.lab = 1, col.pop = 2, row.marknames = 1,
+ sep = "\t", col.others = 0)
+
+ unlink("elizabeth_starts_with_number.stru")
+
+ x1 <- tab(xy1)
+ # should return all 1, incorrect is NA
+ expect_true(all(x1[, grepl("1401_25", colnames(x1)), drop = FALSE] == 1))
+
+ # Column names should have no extra characters in front of them. Your IDE
+ # may be adding them, so watch out!
+ cat(print(" X1_25 X8_54 X1358_15 X1363_12 X1368_57 X1369_41 X1372_14 X1373_9 X1377_42 X1378_53 X1379_10 X1382_37 X1386_27 X1398_46 X1400_9 X1401_25 X1403_13 X1404_17 X1409_42 X1416_48 X1419_11 X1421_14 X1423_5 X1424_74 X1426_55 X1429_46 X1432_23 X1435_30 X1436_7 X1438_9 X1443_37
+A_KH1584 A 1 4 4 1 1 3 2 4 4 2 3 3 2 4 1 3 1 1 2 3 1 4 4 3 2 2 3 4 4 4 2
+A_KH1584 A 1 4 4 1 1 3 2 4 4 4 3 3 4 4 1 3 1 3 2 3 3 4 4 3 4 2 3 4 4 4 2
+C_KH1059 C 0 4 4 1 1 3 2 4 4 2 1 3 2 4 1 3 1 3 2 3 3 2 4 3 2 2 3 2 4 4 2
+C_KH1059 C 0 4 4 1 1 3 2 4 4 4 3 3 2 4 1 3 1 3 2 3 3 4 4 3 2 2 3 4 4 4 2
+M_KH1834 M 0 2 2 1 1 3 2 4 4 2 3 3 2 4 1 3 1 1 2 3 3 4 4 3 2 2 3 2 4 4 2
+M_KH1834 M 0 4 4 1 3 3 2 4 4 2 3 3 2 4 1 3 1 3 2 3 3 4 4 3 2 4 3 4 4 4 2
+M_KH1837 M 1 4 4 1 1 3 2 4 4 0 3 3 2 2 1 3 1 1 2 3 3 4 4 3 4 2 3 4 4 4 2
+M_KH1837 M 1 4 4 1 3 3 2 4 4 0 3 3 4 4 1 3 1 3 2 3 3 4 4 3 4 2 3 4 4 4 2"),
+ file = "elizabeth_starts_with_letter.stru")
+
+ xy2 <- read.structure("elizabeth_starts_with_letter.stru", NA.char="0",
+ n.ind = 4, n.loc = 31, onerowperind = FALSE,
+ col.lab = 1, col.pop = 2, row.marknames = 1,
+ sep = "\t", col.others = 0)
+
+ unlink("elizabeth_starts_with_letter.stru")
+ x2 <- tab(xy2)
+
+ # should return all 1
+ expect_true(all(x2[, grepl("1401_25", colnames(x2)), drop = FALSE] == 1))
+
+})
diff --git a/tests/testthat/test_snapclust.R b/tests/testthat/test_snapclust.R
new file mode 100644
index 0000000..6fb4bad
--- /dev/null
+++ b/tests/testthat/test_snapclust.R
@@ -0,0 +1,62 @@
+context("Test snapclust")
+
+
+test_that("snapclust gives decent results for F1 Zebu-Salers", {
+ skip_on_cran()
+
+
+ set.seed(1)
+
+ ## Simulate hybrids F1 Zebu/Salers
+ data(microbov)
+ zebu <- microbov[pop="Zebu"]
+ salers <- microbov[pop="Salers"]
+ hyb <- hybridize(zebu, salers, n=30)
+ x <- repool(zebu, salers, hyb)
+
+ ## run analysis
+ res.hyb <- snapclust(x, k=2, hybrids=TRUE)
+
+ ## check results
+ expect_true(res.hyb$converged)
+ expect_equal(unname(apply(table(pop(x), res.hyb$group),1,max)),
+ c(50,50,30)) # indiv from original pop all in one group
+
+})
+
+
+
+
+test_that("snapclust gives decent results for F1 & back-cross Zebu-Salers", {
+ skip_on_cran()
+
+ set.seed(1)
+
+ ## Simulate hybrids F1 Zebu/Salers
+ data(microbov)
+ zebu <- microbov[pop="Zebu"]
+ salers <- microbov[pop="Salers"]
+ hyb <- hybridize(zebu, salers, n=30)
+ x <- repool(zebu, salers, hyb)
+
+
+ ## Simulate hybrids backcross (F1 / parental)
+ f1.zebu <- hybridize(hyb, zebu, 20, pop = "f1.zebu")
+ f1.salers <- hybridize(hyb, salers, 25, pop = "f1.salers")
+ y <- repool(x, f1.zebu, f1.salers)
+
+
+ ## method with back-cross
+ res.back <- snapclust(y, k=2, hybrids = TRUE, hybrid.coef = c(.25,.5))
+ tab <- table(pop(y), res.back$group)
+
+ ## check results
+ expect_true(res.back$converged)
+ expect_true(tab[1,1] > 47)
+ expect_true(tab[2,2] > 47)
+ expect_true(tab[3,4] > 25)
+ expect_true(tab[4,3] > 10)
+ expect_true(tab[5,5] > 10)
+
+})
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-cran-adegenet.git
More information about the debian-med-commit
mailing list