[med-svn] [libbpp-popgen] 01/09: Imported Upstream version 2.1.0
Andreas Tille
tille at debian.org
Wed Apr 13 15:21:25 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository libbpp-popgen.
commit 3b8b9588ef6313639e8e6800f31d9a3f88236e12
Author: Andreas Tille <tille at debian.org>
Date: Wed Apr 13 17:12:31 2016 +0200
Imported Upstream version 2.1.0
---
AUTHORS.txt | 19 +
CMakeLists.txt | 177 ++
COPYING.txt | 505 ++++++
ChangeLog | 110 ++
Doxyfile | 1889 ++++++++++++++++++++
INSTALL.txt | 12 +
bpp-popgen.spec | 202 +++
debian/changelog | 55 +
debian/compat | 1 +
debian/control | 25 +
debian/copyright | 66 +
debian/docs | 0
debian/libbpp-popgen-dev.install | 3 +
debian/libbpp-popgen6.install | 1 +
debian/postinst | 43 +
debian/postrm | 45 +
debian/prerm | 27 +
debian/rules | 119 ++
debian/source/format | 1 +
src/Bpp/PopGen/AbstractIDataSet.cpp | 71 +
src/Bpp/PopGen/AbstractIDataSet.h | 80 +
src/Bpp/PopGen/AbstractODataSet.cpp | 58 +
src/Bpp/PopGen/AbstractODataSet.h | 73 +
src/Bpp/PopGen/AlleleInfo.h | 93 +
src/Bpp/PopGen/AnalyzedLoci.cpp | 219 +++
src/Bpp/PopGen/AnalyzedLoci.h | 170 ++
src/Bpp/PopGen/AnalyzedSequences.cpp | 128 ++
src/Bpp/PopGen/AnalyzedSequences.h | 119 ++
src/Bpp/PopGen/BasicAlleleInfo.cpp | 81 +
src/Bpp/PopGen/BasicAlleleInfo.h | 117 ++
src/Bpp/PopGen/BiAlleleMonolocusGenotype.cpp | 119 ++
src/Bpp/PopGen/BiAlleleMonolocusGenotype.h | 133 ++
src/Bpp/PopGen/DarwinDon.cpp | 82 +
src/Bpp/PopGen/DarwinDon.h | 96 +
src/Bpp/PopGen/DarwinVarSingle.cpp | 114 ++
src/Bpp/PopGen/DarwinVarSingle.h | 99 +
src/Bpp/PopGen/DataSet.cpp | 1362 ++++++++++++++
src/Bpp/PopGen/DataSet.h | 695 +++++++
src/Bpp/PopGen/DataSetTools.cpp | 90 +
src/Bpp/PopGen/DataSetTools.h | 80 +
src/Bpp/PopGen/Date.cpp | 138 ++
src/Bpp/PopGen/Date.h | 196 ++
src/Bpp/PopGen/GeneMapperCsvExport.cpp | 235 +++
src/Bpp/PopGen/GeneMapperCsvExport.h | 213 +++
src/Bpp/PopGen/Genepop.cpp | 208 +++
src/Bpp/PopGen/Genepop.h | 99 +
src/Bpp/PopGen/GeneralExceptions.cpp | 220 +++
src/Bpp/PopGen/GeneralExceptions.h | 299 ++++
src/Bpp/PopGen/Genetix.cpp | 135 ++
src/Bpp/PopGen/Genetix.h | 100 ++
src/Bpp/PopGen/Group.cpp | 626 +++++++
src/Bpp/PopGen/Group.h | 506 ++++++
src/Bpp/PopGen/IDataSet.h | 85 +
src/Bpp/PopGen/IODataSet.h | 75 +
src/Bpp/PopGen/Individual.cpp | 632 +++++++
src/Bpp/PopGen/Individual.h | 472 +++++
src/Bpp/PopGen/Locality.h | 130 ++
src/Bpp/PopGen/LocusInfo.cpp | 135 ++
src/Bpp/PopGen/LocusInfo.h | 155 ++
src/Bpp/PopGen/MonoAlleleMonolocusGenotype.cpp | 86 +
src/Bpp/PopGen/MonoAlleleMonolocusGenotype.h | 113 ++
src/Bpp/PopGen/MonolocusGenotype.h | 86 +
src/Bpp/PopGen/MonolocusGenotypeTools.cpp | 61 +
src/Bpp/PopGen/MonolocusGenotypeTools.h | 80 +
src/Bpp/PopGen/MultiAlleleMonolocusGenotype.cpp | 106 ++
src/Bpp/PopGen/MultiAlleleMonolocusGenotype.h | 121 ++
src/Bpp/PopGen/MultiSeqIndividual.cpp | 499 ++++++
src/Bpp/PopGen/MultiSeqIndividual.h | 369 ++++
src/Bpp/PopGen/MultilocusGenotype.cpp | 203 +++
src/Bpp/PopGen/MultilocusGenotype.h | 159 ++
src/Bpp/PopGen/MultilocusGenotypeStatistics.cpp | 816 +++++++++
src/Bpp/PopGen/MultilocusGenotypeStatistics.h | 294 +++
src/Bpp/PopGen/ODataSet.h | 75 +
src/Bpp/PopGen/PolymorphismMultiGContainer.cpp | 323 ++++
src/Bpp/PopGen/PolymorphismMultiGContainer.h | 206 +++
.../PopGen/PolymorphismMultiGContainerTools.cpp | 380 ++++
src/Bpp/PopGen/PolymorphismMultiGContainerTools.h | 123 ++
src/Bpp/PopGen/PolymorphismSequenceContainer.cpp | 447 +++++
src/Bpp/PopGen/PolymorphismSequenceContainer.h | 342 ++++
.../PopGen/PolymorphismSequenceContainerTools.cpp | 582 ++++++
.../PopGen/PolymorphismSequenceContainerTools.h | 255 +++
src/Bpp/PopGen/PopgenlibIO.cpp | 713 ++++++++
src/Bpp/PopGen/PopgenlibIO.h | 190 ++
src/Bpp/PopGen/SequenceStatistics.cpp | 1763 ++++++++++++++++++
src/Bpp/PopGen/SequenceStatistics.h | 1242 +++++++++++++
src/CMakeLists.txt | 104 ++
86 files changed, 21776 insertions(+)
diff --git a/AUTHORS.txt b/AUTHORS.txt
new file mode 100644
index 0000000..647559b
--- /dev/null
+++ b/AUTHORS.txt
@@ -0,0 +1,19 @@
+Eric Bazin
+Sylvain Gaillard <sylvain.gaillard at angers.inra.fr>
+Sylvain Glémin <glemin at univ-montp2.fr>
+Khalid Belkhir <belkhir at univ-montp2.fr>
+
+Contributed code to Bio++ was enabled thanks to the following institutions and resources:
+
+2002 - 2006 Laboratoire GPIA - UMR CNRS 5171 Université Montpellier 2 (Eric Bazin, Khalid Belkhir, Guillaume Deuchst, Julien Dutheil, Sylvain Gaillard, Nicolas Galtier, Sylvain Glémin)
+2005 - ISE-M UMR CNRS 5554 Université Montpellier 2 (Vincent Ranwez, Céline Scornavacca)
+2006 - ISE-M UMR CNRS 5554 Université Montpellier 2 (Khalid Belkhir, Nicolas Galtier, Sylvain Glémin)
+2006 - 2007 ISE-M UMR CNRS 5554 Université Montpellier 2 (Julien Dutheil)
+2007 - 2010 Bioinformatics Research Center, University of Aarhus (Julien Dutheil).
+ Funded by European research Area on Plant Genomics (ERA-PG) ARelatives.
+2010 - ISE-M UMR CNRS 5554 Université Montpellier 2 (Julien Dutheil)
+2007 - Genetics and Horticulture UMR INRA 1259 Angers-Nantes INRA Center (Sylvain Gaillard)
+2008 - 2009 Laboratoire BBE - UMR CNRS 5558 Université Lyon 1 (Bastien Boussau)
+2009 - 2010 Berkeley University (Bastien Boussau)
+2010 - Laboratoire BBE - UMR CNRS 5558 Université Lyon 1 (Bastien Boussau)
+2008 - Laboratoire BBE - UMR CNRS 5558 Université Lyon 1 (Laurent Guéguen)
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..b41c336
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,177 @@
+# CMake script for Bio++ PopGenLib
+# Author: Sylvain Gaillard and Julien Dutheil
+# Created: 21/08/2009
+
+# Global parameters
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+PROJECT(bpp-popgen CXX)
+IF(NOT CMAKE_BUILD_TYPE)
+ SET(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING
+ "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
+ FORCE)
+ENDIF(NOT CMAKE_BUILD_TYPE)
+
+SET(CMAKE_CXX_FLAGS "-Wall -Weffc++ -Wshadow -Wconversion")
+IF(NOT NO_VIRTUAL_COV)
+ SET(NO_VIRTUAL_COV FALSE CACHE BOOL
+ "Disable covariant return type with virtual inheritance, for compilers that do not support it."
+ FORCE)
+ENDIF(NOT NO_VIRTUAL_COV)
+
+IF(NO_VIRTUAL_COV)
+ MESSAGE("-- Covariant return with virtual inheritance disabled.")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNO_VIRTUAL_COV=1")
+ENDIF(NO_VIRTUAL_COV)
+
+IF(NOT NO_DEP_CHECK)
+ SET(NO_DEP_CHECK FALSE CACHE BOOL
+ "Disable dependencies check for building distribution only."
+ FORCE)
+ENDIF(NOT NO_DEP_CHECK)
+
+IF(NO_DEP_CHECK)
+ MESSAGE("-- Dependencies checking disabled. Only distribution can be built.")
+ELSE(NO_DEP_CHECK)
+
+# Libtool-like version number
+# CURRENT:REVISION:AGE => file.so.(C-A).A.R
+# current: The most recent interface number that this library implements.
+# revision: The implementation number of the current interface.
+# age: The difference between the newest and oldest interfaces that this
+# library implements.
+# In other words, the library implements all the interface numbers in the
+# range from number current - age to current.
+SET(BPPPOPGEN_VERSION_CURRENT "6")
+SET(BPPPOPGEN_VERSION_REVISION "3")
+SET(BPPPOPGEN_VERSION_AGE "0")
+
+# Effective version number computation
+MATH(EXPR BPPPOPGEN_VERSION_MAJOR "${BPPPOPGEN_VERSION_CURRENT} - ${BPPPOPGEN_VERSION_AGE}")
+SET(BPPPOPGEN_VERSION_MINOR ${BPPPOPGEN_VERSION_AGE})
+SET(BPPPOPGEN_VERSION_PATCH ${BPPPOPGEN_VERSION_REVISION})
+SET(BPPPOPGEN_VERSION "${BPPPOPGEN_VERSION_MAJOR}.${BPPPOPGEN_VERSION_MINOR}.${BPPPOPGEN_VERSION_PATCH}")
+
+# Set the CMAKE_PREFIX_PATH for the find_library fonction when using non
+# standard install location
+IF(CMAKE_INSTALL_PREFIX)
+ SET(CMAKE_PREFIX_PATH "${CMAKE_INSTALL_PREFIX}" ${CMAKE_PREFIX_PATH})
+ENDIF(CMAKE_INSTALL_PREFIX)
+
+#here is a useful function:
+MACRO(IMPROVED_FIND_LIBRARY OUTPUT_LIBS lib_name include_to_find)
+ #start:
+ FIND_PATH(${lib_name}_INCLUDE_DIR ${include_to_find})
+ SET(${lib_name}_NAMES ${lib_name} ${lib_name}lib ${lib_name}dll)
+ FIND_LIBRARY(${lib_name}_LIBRARY NAMES ${${lib_name}_NAMES} PATH_SUFFIXES lib${LIB_SUFFIX})
+
+ IF(${lib_name}_LIBRARY)
+ MESSAGE("-- Library ${lib_name} found here:")
+ MESSAGE(" includes : ${${lib_name}_INCLUDE_DIR}")
+ MESSAGE(" libraries: ${${lib_name}_LIBRARY}")
+ ELSE(${lib_name}_LIBRARY)
+ MESSAGE(FATAL_ERROR "${lib_name} required but not found.")
+ ENDIF(${lib_name}_LIBRARY)
+
+ #add the dependency:
+ INCLUDE_DIRECTORIES(${${lib_name}_INCLUDE_DIR})
+ SET(${OUTPUT_LIBS} ${${OUTPUT_LIBS}} ${${lib_name}_LIBRARY})
+ENDMACRO(IMPROVED_FIND_LIBRARY)
+
+#Find the Bio++ libraries:
+IMPROVED_FIND_LIBRARY(LIBS bpp-seq Bpp/Seq/Alphabet/Alphabet.h)
+IMPROVED_FIND_LIBRARY(LIBS bpp-core Bpp/Clonable.h)
+
+# Subdirectories
+ADD_SUBDIRECTORY(src)
+
+# Doxygen
+FIND_PACKAGE(Doxygen)
+IF (DOXYGEN_FOUND)
+ ADD_CUSTOM_TARGET (apidoc cp Doxyfile ${CMAKE_BINARY_DIR}/Doxyfile-build
+ COMMAND echo "OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}" >> ${CMAKE_BINARY_DIR}/Doxyfile-build
+ COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/Doxyfile-build
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
+ ADD_CUSTOM_TARGET (apidoc-stable cp Doxyfile ${CMAKE_BINARY_DIR}/Doxyfile-stable
+ COMMAND echo "OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}" >> ${CMAKE_BINARY_DIR}/Doxyfile-stable
+ COMMAND echo "HTML_HEADER=header.html" >> ${CMAKE_BINARY_DIR}/Doxyfile-stable
+ COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/Doxyfile-stable
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
+ENDIF (DOXYGEN_FOUND)
+
+ENDIF(NO_DEP_CHECK)
+
+# Packager
+SET(CPACK_PACKAGE_NAME "libbpp-popgen")
+SET(CPACK_PACKAGE_VENDOR "Bio++ Development Team")
+SET(CPACK_PACKAGE_VERSION "2.1.0")
+SET(CPACK_PACKAGE_VERSION_MAJOR "2")
+SET(CPACK_PACKAGE_VERSION_MINOR "1")
+SET(CPACK_PACKAGE_VERSION_PATCH "0")
+SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "The Bio++ Population Genetics library")
+SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING.txt")
+SET(CPACK_RESOURCE_FILE_AUTHORS "${CMAKE_SOURCE_DIR}/AUTHORS.txt")
+SET(CPACK_RESOURCE_FILE_INSTALL "${CMAKE_SOURCE_DIR}/INSTALL.txt")
+SET(CPACK_SOURCE_GENERATOR "TGZ")
+SET(CPACK_SOURCE_IGNORE_FILES
+ "CMakeFiles"
+ "Makefile"
+ "_CPack_Packages"
+ "CMakeCache.txt"
+ ".*\\\\.cmake"
+ ".*\\\\.git"
+ ".*\\\\.gz"
+ ".*\\\\.deb"
+ ".*\\\\.rpm"
+ ".*\\\\.dmg"
+ ".*\\\\.sh"
+ ".*\\\\..*\\\\.swp"
+ "src/\\\\..*"
+ "src/libbpp*"
+ "debian/tmp"
+ "debian/libbpp.*/"
+ "debian/libbpp.*\\\\.so.*"
+ "debian/libbpp.*\\\\.a"
+ "debian/libbpp.*\\\\.substvars"
+ "debian/libbpp.*\\\\.debhelper"
+ "debian/debhelper\\\\.log"
+ "html"
+ "PopGen.tag"
+ "Testing"
+ "build-stamp"
+ "install_manifest.txt"
+ "DartConfiguration.tcl"
+ ${CPACK_SOURCE_IGNORE_FILES}
+)
+IF (MACOS)
+ SET(CPACK_GENERATOR "Bundle")
+ENDIF()
+
+SET(CPACK_SOURCE_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
+SET(CPACK_DEBSOURCE_PACKAGE_FILE_NAME "lib${CMAKE_PROJECT_NAME}_${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.orig")
+INCLUDE(CPack)
+
+#This adds the 'dist' target
+ADD_CUSTOM_TARGET(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source)
+# 'clean' is not (yet) a first class target. However, we need to clean the directories before building the sources:
+IF("${CMAKE_GENERATOR}" MATCHES "Make")
+ ADD_CUSTOM_TARGET(make_clean
+ COMMAND ${CMAKE_MAKE_PROGRAM} clean
+ WORKING_DIRECTORY ${CMAKE_CURRENT_DIR}
+ )
+ ADD_DEPENDENCIES(dist make_clean)
+ENDIF()
+
+IF(NOT NO_DEP_CHECK)
+IF (UNIX)
+#This creates deb packages:
+ADD_CUSTOM_TARGET(origdist COMMAND cp ${CPACK_SOURCE_PACKAGE_FILE_NAME}.tar.gz ../${CPACK_DEBSOURCE_PACKAGE_FILE_NAME}.tar.gz)
+ADD_DEPENDENCIES(origdist dist)
+ADD_CUSTOM_TARGET(deb dpkg-buildpackage -uc -us -i${CPACK_SOURCE_PACKAGE_FILE_NAME}.tar.gz)
+ADD_DEPENDENCIES(deb origdist)
+
+#This creates rpm packages:
+ADD_CUSTOM_TARGET(rpm rpmbuild -ta ${CPACK_SOURCE_PACKAGE_FILE_NAME}.tar.gz)
+ADD_DEPENDENCIES(rpm dist)
+
+ENDIF()
+ENDIF(NOT NO_DEP_CHECK)
diff --git a/COPYING.txt b/COPYING.txt
new file mode 100644
index 0000000..7e84b53
--- /dev/null
+++ b/COPYING.txt
@@ -0,0 +1,505 @@
+
+ CeCILL FREE SOFTWARE LICENSE AGREEMENT
+
+
+ Notice
+
+This Agreement is a Free Software license agreement that is the result
+of discussions between its authors in order to ensure compliance with
+the two main principles guiding its drafting:
+
+ * firstly, compliance with the principles governing the distribution
+ of Free Software: access to source code, broad rights granted to
+ users,
+ * secondly, the election of a governing law, French law, with which
+ it is conformant, both as regards the law of torts and
+ intellectual property law, and the protection that it offers to
+ both authors and holders of the economic rights over software.
+
+The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[logiciel] L[ibre])
+license are:
+
+Commissariat à l'Energie Atomique - CEA, a public scientific, technical
+and industrial establishment, having its principal place of business at
+31-33 rue de la Fédération, 75752 Paris cedex 15, France.
+
+Centre National de la Recherche Scientifique - CNRS, a public scientific
+and technological establishment, having its principal place of business
+at 3 rue Michel-Ange 75794 Paris cedex 16, France.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, a public scientific and technological establishment, having its
+principal place of business at Domaine de Voluceau, Rocquencourt, BP
+105, 78153 Le Chesnay cedex, France.
+
+
+ Preamble
+
+The purpose of this Free Software license agreement is to grant users
+the right to modify and redistribute the software governed by this
+license within the framework of an open source distribution model.
+
+The exercising of these rights is conditional upon certain obligations
+for users so as to preserve this status for all subsequent redistributions.
+
+In consideration of access to the source code and the rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors only have limited liability.
+
+In this respect, the risks associated with loading, using, modifying
+and/or developing or reproducing the software by the user are brought to
+the user's attention, given its Free Software status, which may make it
+complicated to use, with the result that its use is reserved for
+developers and experienced professionals having in-depth computer
+knowledge. Users are therefore encouraged to load and test the
+Software's suitability as regards their requirements in conditions
+enabling the security of their systems and/or data to be ensured and,
+more generally, to use and operate it in the same conditions of
+security. This Agreement may be freely reproduced and published,
+provided it is not altered, and that no provisions are either added or
+removed herefrom.
+
+This Agreement may apply to any or all software for which the holder of
+the economic rights decides to submit the use thereof to its provisions.
+
+
+ Article 1 - DEFINITIONS
+
+For the purpose of this Agreement, when the following expressions
+commence with a capital letter, they shall have the following meaning:
+
+Agreement: means this license agreement, and its possible subsequent
+versions and annexes.
+
+Software: means the software in its Object Code and/or Source Code form
+and, where applicable, its documentation, "as is" when the Licensee
+accepts the Agreement.
+
+Initial Software: means the Software in its Source Code and possibly its
+Object Code form and, where applicable, its documentation, "as is" when
+it is first distributed under the terms and conditions of the Agreement.
+
+Modified Software: means the Software modified by at least one
+Contribution.
+
+Source Code: means all the Software's instructions and program lines to
+which access is required so as to modify the Software.
+
+Object Code: means the binary files originating from the compilation of
+the Source Code.
+
+Holder: means the holder(s) of the economic rights over the Initial
+Software.
+
+Licensee: means the Software user(s) having accepted the Agreement.
+
+Contributor: means a Licensee having made at least one Contribution.
+
+Licensor: means the Holder, or any other individual or legal entity, who
+distributes the Software under the Agreement.
+
+Contribution: means any or all modifications, corrections, translations,
+adaptations and/or new functions integrated into the Software by any or
+all Contributors, as well as any or all Internal Modules.
+
+Module: means a set of sources files including their documentation that
+enables supplementary functions or services in addition to those offered
+by the Software.
+
+External Module: means any or all Modules, not derived from the
+Software, so that this Module and the Software run in separate address
+spaces, with one calling the other when they are run.
+
+Internal Module: means any or all Module, connected to the Software so
+that they both execute in the same address space.
+
+GNU GPL: means the GNU General Public License version 2 or any
+subsequent version, as published by the Free Software Foundation Inc.
+
+Parties: mean both the Licensee and the Licensor.
+
+These expressions may be used both in singular and plural form.
+
+
+ Article 2 - PURPOSE
+
+The purpose of the Agreement is the grant by the Licensor to the
+Licensee of a non-exclusive, transferable and worldwide license for the
+Software as set forth in Article 5 hereinafter for the whole term of the
+protection granted by the rights over said Software.
+
+
+ Article 3 - ACCEPTANCE
+
+3.1 The Licensee shall be deemed as having accepted the terms and
+conditions of this Agreement upon the occurrence of the first of the
+following events:
+
+ * (i) loading the Software by any or all means, notably, by
+ downloading from a remote server, or by loading from a physical
+ medium;
+ * (ii) the first time the Licensee exercises any of the rights
+ granted hereunder.
+
+3.2 One copy of the Agreement, containing a notice relating to the
+characteristics of the Software, to the limited warranty, and to the
+fact that its use is restricted to experienced users has been provided
+to the Licensee prior to its acceptance as set forth in Article 3.1
+hereinabove, and the Licensee hereby acknowledges that it has read and
+understood it.
+
+
+ Article 4 - EFFECTIVE DATE AND TERM
+
+
+ 4.1 EFFECTIVE DATE
+
+The Agreement shall become effective on the date when it is accepted by
+the Licensee as set forth in Article 3.1.
+
+
+ 4.2 TERM
+
+The Agreement shall remain in force for the entire legal term of
+protection of the economic rights over the Software.
+
+
+ Article 5 - SCOPE OF RIGHTS GRANTED
+
+The Licensor hereby grants to the Licensee, who accepts, the following
+rights over the Software for any or all use, and for the term of the
+Agreement, on the basis of the terms and conditions set forth hereinafter.
+
+Besides, if the Licensor owns or comes to own one or more patents
+protecting all or part of the functions of the Software or of its
+components, the Licensor undertakes not to enforce the rights granted by
+these patents against successive Licensees using, exploiting or
+modifying the Software. If these patents are transferred, the Licensor
+undertakes to have the transferees subscribe to the obligations set
+forth in this paragraph.
+
+
+ 5.1 RIGHT OF USE
+
+The Licensee is authorized to use the Software, without any limitation
+as to its fields of application, with it being hereinafter specified
+that this comprises:
+
+ 1. permanent or temporary reproduction of all or part of the Software
+ by any or all means and in any or all form.
+
+ 2. loading, displaying, running, or storing the Software on any or
+ all medium.
+
+ 3. entitlement to observe, study or test its operation so as to
+ determine the ideas and principles behind any or all constituent
+ elements of said Software. This shall apply when the Licensee
+ carries out any or all loading, displaying, running, transmission
+ or storage operation as regards the Software, that it is entitled
+ to carry out hereunder.
+
+
+ 5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
+
+The right to make Contributions includes the right to translate, adapt,
+arrange, or make any or all modifications to the Software, and the right
+to reproduce the resulting Software.
+
+The Licensee is authorized to make any or all Contributions to the
+Software provided that it includes an explicit notice that it is the
+author of said Contribution and indicates the date of the creation thereof.
+
+
+ 5.3 RIGHT OF DISTRIBUTION
+
+In particular, the right of distribution includes the right to publish,
+transmit and communicate the Software to the general public on any or
+all medium, and by any or all means, and the right to market, either in
+consideration of a fee, or free of charge, one or more copies of the
+Software by any means.
+
+The Licensee is further authorized to distribute copies of the modified
+or unmodified Software to third parties according to the terms and
+conditions set forth hereinafter.
+
+
+ 5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
+
+The Licensee is authorized to distribute true copies of the Software in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Software is
+redistributed, the Licensee allows future Licensees unhindered access to
+the full Source Code of the Software by indicating how to access it, it
+being understood that the additional cost of acquiring the Source Code
+shall not exceed the cost of transferring the data.
+
+
+ 5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
+
+When the Licensee makes a Contribution to the Software, the terms and
+conditions for the distribution of the Modified Software become subject
+to all the provisions of this Agreement.
+
+The Licensee is authorized to distribute the Modified Software, in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Modified
+Software is redistributed, the Licensee allows future Licensees
+unhindered access to the full Source Code of the Modified Software by
+indicating how to access it, it being understood that the additional
+cost of acquiring the Source Code shall not exceed the cost of
+transferring the data.
+
+
+ 5.3.3 DISTRIBUTION OF EXTERNAL MODULES
+
+When the Licensee has developed an External Module, the terms and
+conditions of this Agreement do not apply to said External Module, that
+may be distributed under a separate license agreement.
+
+
+ 5.3.4 COMPATIBILITY WITH THE GNU GPL
+
+The Licensee can include a code that is subject to the provisions of one
+of the versions of the GNU GPL in the Modified or unmodified Software,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+The Licensee can include the Modified or unmodified Software in a code
+that is subject to the provisions of one of the versions of the GNU GPL,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+
+ Article 6 - INTELLECTUAL PROPERTY
+
+
+ 6.1 OVER THE INITIAL SOFTWARE
+
+The Holder owns the economic rights over the Initial Software. Any or
+all use of the Initial Software is subject to compliance with the terms
+and conditions under which the Holder has elected to distribute its work
+and no one shall be entitled to modify the terms and conditions for the
+distribution of said Initial Software.
+
+The Holder undertakes that the Initial Software will remain ruled at
+least by the current license, for the duration set forth in article 4.2.
+
+
+ 6.2 OVER THE CONTRIBUTIONS
+
+A Licensee who develops a Contribution is the owner of the intellectual
+property rights over this Contribution as defined by applicable law.
+
+
+ 6.3 OVER THE EXTERNAL MODULES
+
+A Licensee who develops an External Module is the owner of the
+intellectual property rights over this External Module as defined by
+applicable law and is free to choose the type of agreement that shall
+govern its distribution.
+
+
+ 6.4 JOINT PROVISIONS
+
+The Licensee expressly undertakes:
+
+ 1. not to remove, or modify, in any manner, the intellectual property
+ notices attached to the Software;
+
+ 2. to reproduce said notices, in an identical manner, in the copies
+ of the Software modified or not.
+
+The Licensee undertakes not to directly or indirectly infringe the
+intellectual property rights of the Holder and/or Contributors on the
+Software and to take, where applicable, vis-à-vis its staff, any and all
+measures required to ensure respect of said intellectual property rights
+of the Holder and/or Contributors.
+
+
+ Article 7 - RELATED SERVICES
+
+7.1 Under no circumstances shall the Agreement oblige the Licensor to
+provide technical assistance or maintenance services for the Software.
+
+However, the Licensor is entitled to offer this type of services. The
+terms and conditions of such technical assistance, and/or such
+maintenance, shall be set forth in a separate instrument. Only the
+Licensor offering said maintenance and/or technical assistance services
+shall incur liability therefor.
+
+7.2 Similarly, any Licensor is entitled to offer to its licensees, under
+its sole responsibility, a warranty, that shall only be binding upon
+itself, for the redistribution of the Software and/or the Modified
+Software, under terms and conditions that it is free to decide. Said
+warranty, and the financial terms and conditions of its application,
+shall be subject of a separate instrument executed between the Licensor
+and the Licensee.
+
+
+ Article 8 - LIABILITY
+
+8.1 Subject to the provisions of Article 8.2, the Licensee shall be
+entitled to claim compensation for any direct loss it may have suffered
+from the Software as a result of a fault on the part of the relevant
+Licensor, subject to providing evidence thereof.
+
+8.2 The Licensor's liability is limited to the commitments made under
+this Agreement and shall not be incurred as a result of in particular:
+(i) loss due the Licensee's total or partial failure to fulfill its
+obligations, (ii) direct or consequential loss that is suffered by the
+Licensee due to the use or performance of the Software, and (iii) more
+generally, any consequential loss. In particular the Parties expressly
+agree that any or all pecuniary or business loss (i.e. loss of data,
+loss of profits, operating loss, loss of customers or orders,
+opportunity cost, any disturbance to business activities) or any or all
+legal proceedings instituted against the Licensee by a third party,
+shall constitute consequential loss and shall not provide entitlement to
+any or all compensation from the Licensor.
+
+
+ Article 9 - WARRANTY
+
+9.1 The Licensee acknowledges that the scientific and technical
+state-of-the-art when the Software was distributed did not enable all
+possible uses to be tested and verified, nor for the presence of
+possible defects to be detected. In this respect, the Licensee's
+attention has been drawn to the risks associated with loading, using,
+modifying and/or developing and reproducing the Software which are
+reserved for experienced users.
+
+The Licensee shall be responsible for verifying, by any or all means,
+the product's suitability for its requirements, its good working order,
+and for ensuring that it shall not cause damage to either persons or
+properties.
+
+9.2 The Licensor hereby represents, in good faith, that it is entitled
+to grant all the rights over the Software (including in particular the
+rights set forth in Article 5).
+
+9.3 The Licensee acknowledges that the Software is supplied "as is" by
+the Licensor without any other express or tacit warranty, other than
+that provided for in Article 9.2 and, in particular, without any warranty
+as to its commercial value, its secured, safe, innovative or relevant
+nature.
+
+Specifically, the Licensor does not warrant that the Software is free
+from any error, that it will operate without interruption, that it will
+be compatible with the Licensee's own equipment and software
+configuration, nor that it will meet the Licensee's requirements.
+
+9.4 The Licensor does not either expressly or tacitly warrant that the
+Software does not infringe any third party intellectual property right
+relating to a patent, software or any other property right. Therefore,
+the Licensor disclaims any and all liability towards the Licensee
+arising out of any or all proceedings for infringement that may be
+instituted in respect of the use, modification and redistribution of the
+Software. Nevertheless, should such proceedings be instituted against
+the Licensee, the Licensor shall provide it with technical and legal
+assistance for its defense. Such technical and legal assistance shall be
+decided on a case-by-case basis between the relevant Licensor and the
+Licensee pursuant to a memorandum of understanding. The Licensor
+disclaims any and all liability as regards the Licensee's use of the
+name of the Software. No warranty is given as regards the existence of
+prior rights over the name of the Software or as regards the existence
+of a trademark.
+
+
+ Article 10 - TERMINATION
+
+10.1 In the event of a breach by the Licensee of its obligations
+hereunder, the Licensor may automatically terminate this Agreement
+thirty (30) days after notice has been sent to the Licensee and has
+remained ineffective.
+
+10.2 A Licensee whose Agreement is terminated shall no longer be
+authorized to use, modify or distribute the Software. However, any
+licenses that it may have granted prior to termination of the Agreement
+shall remain valid subject to their having been granted in compliance
+with the terms and conditions hereof.
+
+
+ Article 11 - MISCELLANEOUS
+
+
+ 11.1 EXCUSABLE EVENTS
+
+Neither Party shall be liable for any or all delay, or failure to
+perform the Agreement, that may be attributable to an event of force
+majeure, an act of God or an outside cause, such as defective
+functioning or interruptions of the electricity or telecommunications
+networks, network paralysis following a virus attack, intervention by
+government authorities, natural disasters, water damage, earthquakes,
+fire, explosions, strikes and labor unrest, war, etc.
+
+11.2 Any Failure by either Party, on one or more occasions, to invoke
+one or more of the provisions hereof, shall under no circumstances be
+interpreted as being a waiver by the interested Party of its right to
+invoke said provision(s) subsequently.
+
+11.3 The Agreement cancels and replaces any or all previous agreements,
+whether written or oral, between the Parties and having the same
+purpose, and constitutes the entirety of the agreement between said
+Parties concerning said purpose. No supplement or modification to the
+terms and conditions hereof shall be effective as between the Parties
+unless it is made in writing and signed by their duly authorized
+representatives.
+
+11.4 In the event that one or more of the provisions hereof were to
+conflict with a current or future applicable act or legislative text,
+said act or legislative text shall prevail, and the Parties shall make
+the necessary amendments so as to comply with said act or legislative
+text. All other provisions shall remain effective. Similarly, invalidity
+of a provision of the Agreement, for any reason whatsoever, shall not
+cause the Agreement as a whole to be invalid.
+
+
+ 11.5 LANGUAGE
+
+The Agreement is drafted in both French and English and both versions
+are deemed authentic.
+
+
+ Article 12 - NEW VERSIONS OF THE AGREEMENT
+
+12.1 Any person is authorized to duplicate and distribute copies of this
+Agreement.
+
+12.2 So as to ensure coherence, the wording of this Agreement is
+protected and may only be modified by the authors of the License, who
+reserve the right to periodically publish updates or new versions of the
+Agreement, each with a separate number. These subsequent versions may
+address new issues encountered by Free Software.
+
+12.3 Any Software distributed under a given version of the Agreement may
+only be subsequently distributed under the same version of the Agreement
+or a subsequent version, subject to the provisions of Article 5.3.4.
+
+
+ Article 13 - GOVERNING LAW AND JURISDICTION
+
+13.1 The Agreement is governed by French law. The Parties agree to
+endeavor to seek an amicable solution to any disagreements or disputes
+that may arise during the performance of the Agreement.
+
+13.2 Failing an amicable solution within two (2) months as from their
+occurrence, and unless emergency proceedings are necessary, the
+disagreements or disputes shall be referred to the Paris Courts having
+jurisdiction, by the more diligent Party.
+
+
+Version 2.0 dated 2005-05-21.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..cc4bc56
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,110 @@
+07/03/13 -*- Version 2.1.0 -*-
+
+06/05/13 Julien Dutheil
+* Fixed bug #67
+* Fixed bug #8
+
+09/02/12 -*- Version 2.0.3 -*-
+
+09/06/11 -*- Version 2.0.2 -*-
+
+08/06/11 Benoît Nabholz
+* Added Fst calculation from Hudson, Slatkin and Maddison 1992 (Genetics 132:153).
+
+28/02/11 -*- Version 2.0.1 -*-
+
+07/02/11 -*- Version 2.0.0 -*-
+
+21/07/08 -*- Version 1.4.0 -*-
+
+18/11/09 Sylain Gaillard
+* Switching from pointer to reference in many methods arguments and return
+values.
+* Introducing std::auto_ptr when methods need to return pointer to new
+object.
+
+16/11/09 Sylvain Gaillard
+* No more "using namespace" in header files.
+
+21/07/09 Sylvain Gaillard
+* Removed Coord and CoordsTools classes (move to Utils/Point2D and
+Utils/Point2DTools)
+* Code update to use Point2D class instead of Coord class
+
+23/06/09 Julien Dutheil
+* Code update for compatibility with seqlib. Changed pointers to refs when
+retrieving sequences.
+
+19/06/09 Sylvain Gaillard
+* Removed all 'using namespace' statement from header files
+
+18/06/09 Sylvain Gaillard
+* Fix SequenceTools::getFrequencies call in SequenceStatistics
+* Fix SequenceStatistics::getTransitionsTransversionsRatio
+
+21/07/08 -*- Version 1.4.0 -*-
+
+15/04/09 Sylvain Gaillard
+* Moved to UTF8 encoding
+* Fixed computations in Fu & Li D statistics
+
+08/04/09 Sylvain Gaillard
+* Fixed calls to Seq::SymbolListTools::getCounts
+
+31/03/09 Sylvain Gaillard
+* Fixed Doxygen documentation @author tags
+* Fixed warnings in bpp::DarwinDon::write bpp::DarwinVarSingle::write and
+ bpp::MonolocusGenotypeTools::buildMonolocusGenotypeByAlleleKey
+
+19/01/09 Sylvain Gaillard
+* Fixed bug in PolymorphismSequenceContainerTools::getSelectedSequences now
+ set the sequence count properly.
+
+16/01/09 Sylvain Gaillard
+* Fixed method PolymorphismSequenceContainerTools::extractGroup which now
+ return the extracted group and not all groups but the extracted one.
+
+08/01/09 Sylvain Gaillard
+* New method sample in PolymorphismSequenceContainerTools.
+* Extend main page doxygen documentation.
+* A bit of cleaning.
+
+07/01/09 Céline Scornavacca & Julien Dutheil
+* Bug fixed in clone method of PolymorphismSequenceContainer.
+
+21/07/08 -*- Version 1.3.1 -*-
+
+10/11/08 Sylvain Gaillard
+* Fixed a bug in DataSet::getGroupName().
+
+21/07/08 -*- Version 1.3.0 -*-
+
+09/04/08 Sylvain Gaillard
+* Added DarwinDon and DarwinVarSingle output class.
+
+04/04/08 Sylvain Gaillard
+* Added MultiAlleleMonolocusGenotype and MonolocusGenotypeTools class.
+* Added GeneMapperCsvExport input class.
+
+01/04/08 Sylvain Gaillard
+* Fixed bug in SequenceStatistics::DVH (thanks to Alicia).
+
+18/01/08 -*- Version 1.2.0 -*-
+
+12/01/08 Julien Dutheil
+* Compatibility update: inclusion in namespace bpp + code update.
+
+06/07/07 -*- Version 1.1.1 -*-
+
+15/05/07 Julien Dutheil
+* Compatibility update (NumCalc).
+* PopGenLib does not depend anymore on PhylLib.
+* Coord and Locality classes code improved.
+
+28/08/06 -*- Version 1.1.0 -*-
+
+21/06/06 Khalid Belkhir
+* Nouvelles fonctionnalités pour les permutations.
+* Ajout Fstat multilocus.
+* Construction de matrices de distances.
+* Correction de bugs
diff --git a/Doxyfile b/Doxyfile
new file mode 100644
index 0000000..c87d577
--- /dev/null
+++ b/Doxyfile
@@ -0,0 +1,1889 @@
+# Doxyfile 1.8.3.1-20130209
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING = UTF-8
+
+# The PROJECT_NAME tag is a single word (or sequence of words) that should
+# identify the project. Note that if you do not use Doxywizard you need
+# to put quotes around the project name if it contains spaces.
+
+PROJECT_NAME = bpp-popgen
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER = 2.1.0
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer
+# a quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF =
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is
+# included in the documentation. The maximum height of the logo should not
+# exceed 55 pixels and the maximum width should not exceed 200 pixels.
+# Doxygen will copy the logo to the output directory.
+
+PROJECT_LOGO =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY =
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Latvian, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip. Note that you specify absolute paths here, but also
+# relative paths, which will be relative from the directory where doxygen is
+# started.
+
+STRIP_FROM_PATH = ./src/
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH = ./src/
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful if your file system
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 2
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding
+# "class=itcl::class" will allow you to use the command class in the
+# itcl::class meaning.
+
+TCL_SUBST =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension,
+# and language is one of the parsers supported by doxygen: IDL, Java,
+# Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C,
+# C++. For instance to make doxygen treat .inc files as Fortran files (default
+# is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note
+# that for custom extensions you also need to set FILE_PATTERNS otherwise the
+# files are not read by doxygen.
+
+EXTENSION_MAPPING =
+
+# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all
+# comments according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you
+# can mix doxygen, HTML, and XML commands with Markdown formatting.
+# Disable only in case of backward compatibilities issues.
+
+MARKDOWN_SUPPORT = YES
+
+# When enabled doxygen tries to link words that correspond to documented classes,
+# or namespaces to their corresponding documentation. Such a link can be
+# prevented in individual cases by by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+
+AUTOLINK_SUPPORT = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also makes the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT = YES
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES (the
+# default) will make doxygen replace the get and set methods by a property in
+# the documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
+# unions are shown inside the group in which they are included (e.g. using
+# @ingroup) instead of on a separate page (for HTML and Man pages) or
+# section (for LaTeX and RTF).
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
+# unions with only public data fields or simple typedef fields will be shown
+# inline in the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO (the default), structs, classes, and unions are shown on a separate
+# page (for HTML and Man pages) or section (for LaTeX and RTF).
+
+INLINE_SIMPLE_STRUCTS = NO
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penalty.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will roughly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols.
+
+SYMBOL_CACHE_SIZE = 0
+
+# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
+# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
+# their name and scope. Since this can be an expensive process and often the
+# same symbol appear multiple times in the code, doxygen keeps a cache of
+# pre-resolved symbols. If the cache is too small doxygen will become slower.
+# If the cache is too large, memory is wasted. The cache size is given by this
+# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols.
+
+LOOKUP_CACHE_SIZE = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = YES
+
+# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
+# scope will be included in the documentation.
+
+EXTRACT_PACKAGE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespaces are hidden.
+
+EXTRACT_ANON_NSPACES = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES = NO
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
+# do proper type resolution of all parameters of a function it will reject a
+# match between the prototype and the implementation of a member function even
+# if there is only one candidate or it is obvious which candidate to choose
+# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
+# will still accept a match between prototype and implementation in such cases.
+
+STRICT_PROTO_MATCHING = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = NO
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = NO
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if section-label ... \endif
+# and \cond section-label ... \endcond blocks.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or macro consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and macros in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page. This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option.
+# You can optionally specify a file name after the option, if omitted
+# DoxygenLayout.xml will be used as the name of the layout file.
+
+LAYOUT_FILE =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files
+# containing the references data. This must be a list of .bib files. The
+# .bib extension is automatically appended if omitted. Using this command
+# requires the bibtex tool to be installed. See also
+# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
+# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
+# feature you need bibtex and perl available in the search path. Do not use
+# file names with spaces, bibtex cannot handle them.
+
+CITE_BIB_FILES =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR = YES
+
+# The WARN_NO_PARAMDOC option can be enabled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = src
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
+# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
+# *.f90 *.f *.for *.vhd *.vhdl
+
+FILE_PATTERNS = *.h \
+ *.cpp
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output. If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty or if
+# non of the patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
+# and it is also possible to disable source filtering for a specific pattern
+# using *.ext= (so without naming a filter). This option only has effect when
+# FILTER_SOURCE_FILES is enabled.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page (index.html).
+# This can be useful if you have a project on for instance GitHub and want reuse
+# the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE =
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C, C++ and Fortran comments will always remain visible.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = YES
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code. Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header. Note that when using a custom header you are responsible
+# for the proper inclusion of any scripts and style sheets that doxygen
+# needs, which is dependent on the configuration options used.
+# It is advised to generate a default header using "doxygen -w html
+# header.html footer.html stylesheet.css YourConfigFile" and then modify
+# that header. Note that the header is subject to change so you typically
+# have to redo this when upgrading to a newer version of doxygen or when
+# changing the value of configuration settings such as GENERATE_TREEVIEW!
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If left blank doxygen will
+# generate a default style sheet. Note that it is recommended to use
+# HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this
+# tag will in the future become obsolete.
+
+HTML_STYLESHEET =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional
+# user-defined cascading style sheet that is included after the standard
+# style sheets created by doxygen. Using this option one can overrule
+# certain style aspects. This is preferred over using HTML_STYLESHEET
+# since it does not replace the standard style sheet and is therefor more
+# robust against future updates. Doxygen will copy the style sheet file to
+# the output directory.
+
+HTML_EXTRA_STYLESHEET =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that
+# the files will be copied as-is; there are no commands or markers available.
+
+HTML_EXTRA_FILES =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
+# Doxygen will adjust the colors in the style sheet and background images
+# according to this color. Hue is specified as an angle on a colorwheel,
+# see http://en.wikipedia.org/wiki/Hue for more information.
+# For instance the value 0 represents red, 60 is yellow, 120 is green,
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
+# The allowed range is 0 to 359.
+
+HTML_COLORSTYLE_HUE = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
+# the colors in the HTML output. For a value of 0 the output will use
+# grayscales only. A value of 255 will produce the most vivid colors.
+
+HTML_COLORSTYLE_SAT = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
+# the luminance component of the colors in the HTML output. Values below
+# 100 gradually make the output lighter, whereas values above 100 make
+# the output darker. The value divided by 100 is the actual gamma applied,
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
+# and 100 does not change the gamma.
+
+HTML_COLORSTYLE_GAMMA = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+
+HTML_DYNAMIC_SECTIONS = YES
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of
+# entries shown in the various tree structured indices initially; the user
+# can expand and collapse entries dynamically later on. Doxygen will expand
+# the tree to such a level that at most the specified number of entries are
+# visible (unless a fully collapsed tree already exceeds this amount).
+# So setting the number of entries 1 will produce a full collapsed tree by
+# default. 0 is a special value representing an infinite number of entries
+# and will result in a full expanded tree by default.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+
+GENERATE_DOCSET = YES
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME = "Bio++ Population Genetics Library"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID = bpp.popgen
+
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely
+# identify the documentation publisher. This should be a reverse domain-name
+# style string, e.g. com.mycompany.MyDocSet.documentation.
+
+DOCSET_PUBLISHER_ID = org.doxygen.Publisher
+
+# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
+
+DOCSET_PUBLISHER_NAME = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
+# that can be used as input for Qt's qhelpgenerator to generate a
+# Qt Compressed Help (.qch) of the generated HTML documentation.
+
+GENERATE_QHP = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
+# add. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
+# Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
+# Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+# will be generated, which together with the HTML files, form an Eclipse help
+# plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
+# the help appears.
+
+GENERATE_ECLIPSEHELP = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+
+ECLIPSE_DOC_ID = org.doxygen.Project
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
+# at top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it. Since the tabs have the same information as the
+# navigation tree you can set this option to NO if you already set
+# GENERATE_TREEVIEW to YES.
+
+DISABLE_INDEX = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+# Since the tree basically has the same information as the tab index you
+# could consider to set DISABLE_INDEX to NO when enabling this option.
+
+GENERATE_TREEVIEW = YES
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
+# (range [0,1..20]) that doxygen will group on one line in the generated HTML
+# documentation. Note that a value of 0 will completely suppress the enum
+# values from appearing in the overview section.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
+# links to external symbols imported via tag files in a separate window.
+
+EXT_LINKS_IN_WINDOW = NO
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are
+# not supported properly for IE 6.0, but are supported on all modern browsers.
+# Note that when changing this option you need to delete any form_*.png files
+# in the HTML output before the changes have effect.
+
+FORMULA_TRANSPARENT = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
+# (see http://www.mathjax.org) which uses client side Javascript for the
+# rendering instead of using prerendered bitmaps. Use this if you do not
+# have LaTeX installed or if you want to formulas look prettier in the HTML
+# output. When enabled you may also need to install MathJax separately and
+# configure the path to it using the MATHJAX_RELPATH option.
+
+USE_MATHJAX = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# thA MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and
+# SVG. The default value is HTML-CSS, which is slower, but has the best
+# compatibility.
+
+MATHJAX_FORMAT = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the
+# HTML output directory using the MATHJAX_RELPATH option. The destination
+# directory should contain the MathJax.js script. For instance, if the mathjax
+# directory is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to
+# the MathJax Content Delivery Network so you can quickly see the result without
+# installing MathJax. However, it is strongly recommended to install a local
+# copy of MathJax from http://www.mathjax.org before deployment.
+
+MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
+# names that should be enabled during MathJax rendering.
+
+MATHJAX_EXTENSIONS =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
+# (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript.
+# There are two flavours of web server based search depending on the
+# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for
+# searching and an index file used by the script. When EXTERNAL_SEARCH is
+# enabled the indexing and searching needs to be provided by external tools.
+# See the manual for details.
+
+SERVER_BASED_SEARCH = NO
+
+# When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain
+# the search results. Doxygen ships with an example indexer (doxyindexer) and
+# search engine (doxysearch.cgi) which are based on the open source search engine
+# library Xapian. See the manual for configuration details.
+
+EXTERNAL_SEARCH = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will returned the search results when EXTERNAL_SEARCH is enabled.
+# Doxygen ships with an example search engine (doxysearch) which is based on
+# the open source search engine library Xapian. See the manual for configuration
+# details.
+
+SEARCHENGINE_URL =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+
+SEARCHDATA_FILE = searchdata.xml
+
+# When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+
+EXTERNAL_SEARCH_ID =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id
+# of to a relative location where the documentation can be found.
+# The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ...
+
+EXTRA_SEARCH_MAPPINGS =
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES = amsmath
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
+# the generated latex document. The footer should contain everything after
+# the last chapter. If it is left blank doxygen will generate a
+# standard footer. Notice: only use this tag if you know what you are doing!
+
+LATEX_FOOTER =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images
+# or other source files which should be copied to the LaTeX output directory.
+# Note that the files will be copied as-is; there are no commands or markers
+# available.
+
+LATEX_EXTRA_FILES =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
+# http://en.wikipedia.org/wiki/BibTeX for more info.
+
+LATEX_BIB_STYLE = plain
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load style sheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES Doxygen will generate DOCBOOK files
+# that can be used to generate PDF.
+
+GENERATE_DOCBOOK = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the DOCBOOK pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it. If left blank docbook will be used as the default path.
+
+DOCBOOK_OUTPUT = docbook
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader. This is useful
+# if you want to understand what is going on. On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# pointed to by INCLUDE_PATH will be searched when a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition that
+# overrules the definition found in the source code.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all references to function-like macros
+# that are alone on a line, have an all uppercase name, and do not end with a
+# semicolon, because these will confuse the parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles. For each
+# tag file the location of the external documentation should be added. The
+# format of a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths
+# or URLs. Note that each tag file must have a unique name (where the name does
+# NOT include the path). If a tag file is not located in the directory in which
+# doxygen is run, you must also specify the path to the tagfile here.
+
+TAGFILES = ../bpp-core/BppCore.tag=../../bpp-core/html \
+ ../bpp-seq/BppSeq.tag=../../bpp-seq/html
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE = BppPopGen.tag
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = YES
+
+# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed
+# in the related pages index. If set to NO, only the current project's
+# pages will be listed.
+
+EXTERNAL_PAGES = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option also works with HAVE_DOT disabled, but it is recommended to
+# install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = YES
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
+# allowed to run in parallel. When set to 0 (the default) doxygen will
+# base this on the number of processors available in the system. You can set it
+# explicitly to a value larger than 0 to get control over the balance
+# between CPU load and processing speed.
+
+DOT_NUM_THREADS = 0
+
+# By default doxygen will use the Helvetica font for all dot files that
+# doxygen generates. When you want a differently looking font you can specify
+# the font name using DOT_FONTNAME. You need to make sure dot is able to find
+# the font, which can be done by putting it in a standard location or by setting
+# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
+# directory containing the font.
+
+DOT_FONTNAME = FreeSans
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE = 10
+
+# By default doxygen will tell dot to use the Helvetica font.
+# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
+# set the path where dot can find it.
+
+DOT_FONTPATH =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside
+# the class node. If there are many fields or methods and many nodes the
+# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS
+# threshold limits the number of items for each type to make the size more
+# managable. Set this to 0 for no limit. Note that the threshold may be
+# exceeded by 50% before the limit is enforced.
+
+UML_LIMIT_NUM_FIELDS = 10
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will generate a graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are svg, png, jpg, or gif.
+# If left blank png will be used. If you choose svg you need to set
+# HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible in IE 9+ (other browsers do not have this requirement).
+
+DOT_IMAGE_FORMAT = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+# Note that this requires a modern browser other than Internet Explorer.
+# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
+# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible. Older versions of IE do not have SVG support.
+
+INTERACTIVE_SVG = NO
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the
+# \mscfile command).
+
+MSCFILE_DIRS =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS = YES
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
diff --git a/INSTALL.txt b/INSTALL.txt
new file mode 100644
index 0000000..c8859da
--- /dev/null
+++ b/INSTALL.txt
@@ -0,0 +1,12 @@
+This software needs cmake >= 2.6 to build.
+
+After installing cmake, run it with the following command:
+cmake -DCMAKE_INSTALL_PREFIX=[where to install, for instance /usr/local or $HOME/.local] .
+
+If available, you can also use ccmake instead of cmake for a more user-friendly interface.
+
+Then compile and install the software with
+make install
+
+You may also consider installing and using the software checkinstall for easier system administration.
+
diff --git a/bpp-popgen.spec b/bpp-popgen.spec
new file mode 100644
index 0000000..c36d2f2
--- /dev/null
+++ b/bpp-popgen.spec
@@ -0,0 +1,202 @@
+%define _basename bpp-popgen
+%define _version 2.1.0
+%define _release 1
+%define _prefix /usr
+
+URL: http://biopp.univ-montp2.fr/
+
+Name: %{_basename}
+Version: %{_version}
+Release: %{_release}
+License: CECILL-2.0
+Vendor: The Bio++ Project
+Source: http://biopp.univ-montp2.fr/repos/sources/%{_basename}-%{_version}.tar.gz
+Summary: Bio++ Population Genetics library
+Group: Development/Libraries/C and C++
+Requires: bpp-core = %{_version}
+Requires: bpp-seq = %{_version}
+
+BuildRoot: %{_builddir}/%{_basename}-root
+BuildRequires: cmake >= 2.6.0
+BuildRequires: gcc-c++ >= 4.0.0
+BuildRequires: libbpp-core2 = %{_version}
+BuildRequires: libbpp-core-devel = %{_version}
+BuildRequires: libbpp-seq9 = %{_version}
+BuildRequires: libbpp-seq-devel = %{_version}
+
+AutoReq: yes
+AutoProv: yes
+
+%description
+This library contains utilitary and classes for population genetics analysis.
+It is part of the Bio++ project.
+
+%package -n libbpp-popgen6
+Summary: Bio++ Population Genetics library
+Group: Development/Libraries/C and C++
+
+%description -n libbpp-popgen6
+This library contains utilitary and classes for population genetics and molecular evolution analysis.
+It is part of the Bio++ project.
+
+%package -n libbpp-popgen-devel
+Summary: Libraries, includes to develop applications with %{_basename}
+Group: Development/Libraries/C and C++
+Requires: libbpp-popgen6 = %{_version}
+Requires: libbpp-seq9 = %{_version}
+Requires: libbpp-seq-devel = %{_version}
+Requires: libbpp-core2 = %{_version}
+Requires: libbpp-core-devel = %{_version}
+
+%description -n libbpp-popgen-devel
+The libbpp-popgen-devel package contains the header files and static libraries for
+building applications which use %{_basename}.
+
+%prep
+%setup -q
+
+%build
+CFLAGS="$RPM_OPT_FLAGS"
+CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=%{_prefix} -DBUILD_TESTING=OFF"
+if [ %{_lib} == 'lib64' ] ; then
+ CMAKE_FLAGS="$CMAKE_FLAGS -DLIB_SUFFIX=64"
+fi
+cmake $CMAKE_FLAGS .
+make
+
+%install
+make DESTDIR=$RPM_BUILD_ROOT install
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%post -n libbpp-popgen6 -p /sbin/ldconfig
+
+%post -n libbpp-popgen-devel
+createGeneric() {
+ echo "-- Creating generic include file: $1.all"
+ #Make sure we run into subdirectories first:
+ dirs=()
+ for file in "$1"/*
+ do
+ if [ -d "$file" ]
+ then
+ # Recursion:
+ dirs+=( "$file" )
+ fi
+ done
+ for dir in ${dirs[@]}
+ do
+ createGeneric $dir
+ done
+ #Now list all files, including newly created .all files:
+ if [ -f $1.all ]
+ then
+ rm $1.all
+ fi
+ dir=`basename $1`
+ for file in "$1"/*
+ do
+ if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] )
+ then
+ file=`basename $file`
+ echo "#include \"$dir/$file\"" >> $1.all
+ fi
+ done;
+}
+# Actualize .all files
+createGeneric %{_prefix}/include/Bpp
+exit 0
+
+%preun -n libbpp-popgen-devel
+removeGeneric() {
+ if [ -f $1.all ]
+ then
+ echo "-- Remove generic include file: $1.all"
+ rm $1.all
+ fi
+ for file in "$1"/*
+ do
+ if [ -d "$file" ]
+ then
+ # Recursion:
+ removeGeneric $file
+ fi
+ done
+}
+# Actualize .all files
+removeGeneric %{_prefix}/include/Bpp
+exit 0
+
+%postun -n libbpp-popgen6 -p /sbin/ldconfig
+
+%postun -n libbpp-popgen-devel
+createGeneric() {
+ echo "-- Creating generic include file: $1.all"
+ #Make sure we run into subdirectories first:
+ dirs=()
+ for file in "$1"/*
+ do
+ if [ -d "$file" ]
+ then
+ # Recursion:
+ dirs+=( "$file" )
+ fi
+ done
+ for dir in ${dirs[@]}
+ do
+ createGeneric $dir
+ done
+ #Now list all files, including newly created .all files:
+ if [ -f $1.all ]
+ then
+ rm $1.all
+ fi
+ dir=`basename $1`
+ for file in "$1"/*
+ do
+ if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] )
+ then
+ file=`basename $file`
+ echo "#include \"$dir/$file\"" >> $1.all
+ fi
+ done;
+}
+
+%files -n libbpp-popgen6
+%defattr(-,root,root)
+%doc AUTHORS.txt COPYING.txt INSTALL.txt ChangeLog
+%{_prefix}/%{_lib}/lib*.so.*
+
+%files -n libbpp-popgen-devel
+%defattr(-,root,root)
+%doc AUTHORS.txt COPYING.txt INSTALL.txt ChangeLog
+%{_prefix}/%{_lib}/lib*.so
+%{_prefix}/%{_lib}/lib*.a
+%{_prefix}/include/*
+
+%changelog
+* Thu Mar 07 2013 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.1.0-1
+- Bug fixed and warnings removed.
+* Thu Feb 09 2012 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.0.3-1
+- Recompilation for dependencies.
+* Thu Jun 09 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.0.2-1
+- New Fst calculations + bugs fixed.
+* Mon Feb 28 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.0.1-1
+* Mon Feb 07 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.0.0-1
+* Thu Mar 25 2010 Julien Dutheil <julien.dutheil at univ-montp2.fr> 1.5.0-1
+* Wed Jun 10 2009 Julien Dutheil <jdutheil at birc.au.dk> 1.4.0-1
+* Thu Dec 11 2008 Julien Dutheil <jdutheil at birc.au.dk> 1.3.1-1
+* Mon Jul 21 2008 Julien Dutheil <jdutheil at birc.au.dk> 1.3.0-1
+* Fri Jan 18 2008 Julien Dutheil <Julien.Dutheil at univ-montp2.fr> 1.2.0-1
+* Fri Jul 06 2007 Julien Dutheil <Julien.Dutheil at univ-montp2.fr> 1.1.1-1
+- For compatibility. No more dependency for Bpp-Phyl.
+* Fri Jan 19 2007 Julien Dutheil <Julien.Dutheil at univ-montp2.fr> 1.1.0-2
+- Build 2 for compatibility.
+* Mon Aug 28 2006 Julien Dutheil <Julien.Dutheil at univ-montp2.fr> 1.1.0-1
+- Now requires Bpp-Phyl too!
+* Tue Apr 18 2006 Julien Dutheil <Julien.Dutheil at univ-montp2.fr> 1.0.0-2
+- Build 2 for compatibility with other libs. Added STL dependency.
+* Fri Nov 16 2005 Julien Dutheil <Julien.Dutheil at univ-montp2.fr> 1.0.0-1
+- First draft of the spec file.
+
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..0c1aec5
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,55 @@
+libbpp-popgen (2.1.0-1) unstable; urgency=low
+
+ * Bug fixed and warnings removed.
+
+ -- Julien Dutheil <julien.dutheil at univ-montp2.fr> Thu, 07 Mar 2013 10:51:00 +0100
+
+libbpp-popgen (2.0.3-1) unstable; urgency=low
+
+ * Recompilation because of dependencies.
+
+ -- Julien Dutheil <julien.dutheil at univ-montp2.fr> Thu, 09 Feb 2012 16:30:00 +0100
+
+libbpp-popgen (2.0.2-1) unstable; urgency=low
+
+ * RFP: Bio++ -- The Bio++ bioinformatics libraries. (Closes: #616373).
+ * Packages are now non-native.
+ * New Fst computation from Hudson, Slatkin and Maddison (Genetics, 1992).
+
+ -- Julien Dutheil <julien.dutheil at univ-montp2.fr> Thu, 09 Jun 2011 11:00:00 +0100
+
+libbpp-popgen (2.0.1) unstable; urgency=low
+
+ * Fixed copyright issue in package.
+
+ -- Julien Dutheil <julien.dutheil at univ-montp2.fr> Mon, 28 Feb 2011 09:00:00 +0100
+
+libbpp-popgen (2.0.0) unstable; urgency=low
+
+ * Update for version 2.0 of Bio++. Code reorganization.
+
+ -- Julien Dutheil <julien.dutheil at univ-montp2.fr> Mon, 07 Feb 2011 09:00:00 +0100
+
+libbpp-popgen (1.5.0) unstable; urgency=low
+
+ * Update for version 1.9 of Bio++. Several interface improvements and bug fixed.
+
+ -- Julien Dutheil <julien.dutheil at univ-montp2.fr> Thu, 25 Mar 2010 15:14:55 +0100
+
+libbpp-popgen (1.4.0) unstable; urgency=low
+
+ * Update for version 1.8 of Bio++.
+
+ -- Julien Dutheil <jdutheil at birc.au.dk> Wed, 10 Jun 2009 11:28:58 +0100
+
+libbpp-popgen (1.3.1) unstable; urgency=low
+
+ * Bug fix release.
+
+ -- Julien Dutheil <jdutheil at daimi.au.dk> Thu, 11 Dec 2008 12:21:37 +0100
+
+libbpp-popgen (1.3.0) unstable; urgency=low
+
+ * Initial Release.
+
+ -- Julien Dutheil <jdutheil at daimi.au.dk> Mon, 21 Jul 2008 15:17:26 +0200
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..7ed6ff8
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+5
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..514f887
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,25 @@
+Source: libbpp-popgen
+Section: libs
+Priority: optional
+Maintainer: Loic Dachary <loic at dachary.org>
+Uploaders: Julien Dutheil <julien.dutheil at univ-montp2.fr>
+Build-Depends: debhelper (>= 5), cmake (>= 2.6),
+ libbpp-core-dev (>= 2.1.0), libbpp-seq-dev (>= 2.1.0)
+Standards-Version: 3.9.1
+
+Package: libbpp-popgen-dev
+Section: libdevel
+Architecture: any
+Depends: libbpp-popgen6 (= ${binary:Version}), ${misc:Depends},
+ libbpp-core-dev (>= 2.1.0), libbpp-seq-dev (>= 2.1.0)
+Description: Bio++ Population Genetics library development files.
+ Contains the Bio++ classes for population genetics.
+
+Package: libbpp-popgen6
+Section: libs
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends},
+ libbpp-core2 (>= 2.1.0), libbpp-seq9 (>= 2.1.0)
+Description: Bio++ Population Genetics library.
+ Contains the Bio++ classes for population genetics.
+
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..ff3fdc0
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,66 @@
+This package was debianized by Julien Dutheil <jdutheil at birc.au.dk> on
+Thu, 07 Mar 2013 10:51:00 +0100
+
+It was downloaded from <http://biopp.univ-montp2.fr/Repositories/sources>
+
+Upstream Author:
+
+ Julien Dutheil <julien.dutheil at univ-montp2.fr>
+
+Copyright:
+
+ Copyright (C) 2013 Bio++ Development Team
+
+License:
+
+ This package is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This package is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this package; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+On Debian systems, the complete text of the GNU General
+Public License can be found in `/usr/share/common-licenses/GPL'.
+
+The Debian packaging is (C) 2013, Julien Dutheil <julien.dutheil at univ-montp2.fr> and
+is licensed under the GPL, see above.
+
+The provided software is distributed under the CeCILL license:
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+
+The complete text of the license may be found here:
+http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
+
diff --git a/debian/docs b/debian/docs
new file mode 100644
index 0000000..e69de29
diff --git a/debian/libbpp-popgen-dev.install b/debian/libbpp-popgen-dev.install
new file mode 100644
index 0000000..7d74f2b
--- /dev/null
+++ b/debian/libbpp-popgen-dev.install
@@ -0,0 +1,3 @@
+debian/tmp/usr/include/*
+debian/tmp/usr/lib/lib*.a
+debian/tmp/usr/lib/lib*.so
diff --git a/debian/libbpp-popgen6.install b/debian/libbpp-popgen6.install
new file mode 100644
index 0000000..c45ebcf
--- /dev/null
+++ b/debian/libbpp-popgen6.install
@@ -0,0 +1 @@
+debian/tmp/usr/lib/lib*.so.*
diff --git a/debian/postinst b/debian/postinst
new file mode 100755
index 0000000..cf9e925
--- /dev/null
+++ b/debian/postinst
@@ -0,0 +1,43 @@
+#! /bin/bash
+
+# Abort if any command returns an error value
+set -e
+
+createGeneric() {
+ echo "-- Creating generic include file: $1.all"
+ #Make sure we run into subdirectories first:
+ dirs=()
+ for file in "$1"/*
+ do
+ if [ -d "$file" ]
+ then
+ # Recursion:
+ dirs+=( "$file" )
+ fi
+ done
+ for dir in ${dirs[@]}
+ do
+ createGeneric $dir
+ done
+ #Now list all files, including newly created .all files:
+ if [ -f $1.all ]
+ then
+ rm $1.all
+ fi
+ dir=`basename $1`
+ for file in "$1"/*
+ do
+ if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] )
+ then
+ file=`basename $file`
+ echo "#include \"$dir/$file\"" >> $1.all
+ fi
+ done;
+}
+
+if [ "$1" = "configure" ]; then
+ # Actualize .all files
+ createGeneric /usr/include/Bpp
+fi
+
+exit 0
diff --git a/debian/postrm b/debian/postrm
new file mode 100755
index 0000000..3931669
--- /dev/null
+++ b/debian/postrm
@@ -0,0 +1,45 @@
+#! /bin/bash
+
+# Abort if any command returns an error value
+set -e
+
+createGeneric() {
+ echo "-- Creating generic include file: $1.all"
+ #Make sure we run into subdirectories first:
+ dirs=()
+ for file in "$1"/*
+ do
+ if [ -d "$file" ]
+ then
+ # Recursion:
+ dirs+=( "$file" )
+ fi
+ done
+ for dir in ${dirs[@]}
+ do
+ createGeneric $dir
+ done
+ #Now list all files, including newly created .all files:
+ if [ -f $1.all ]
+ then
+ rm $1.all
+ fi
+ dir=`basename $1`
+ for file in "$1"/*
+ do
+ if [ -f "$file" ] && ( [ "${file##*.}" == "h" ] || [ "${file##*.}" == "all" ] )
+ then
+ file=`basename $file`
+ echo "#include \"$dir/$file\"" >> $1.all
+ fi
+ done;
+}
+
+if [ "$1" = "remove" ]; then
+ # Automatically added by dh_makeshlibs
+ ldconfig
+ # Actualize .all files
+ createGeneric /usr/include/Bpp
+fi
+
+exit 0
diff --git a/debian/prerm b/debian/prerm
new file mode 100755
index 0000000..5aefd24
--- /dev/null
+++ b/debian/prerm
@@ -0,0 +1,27 @@
+#! /bin/bash
+
+# Abort if any command returns an error value
+set -e
+
+removeGeneric() {
+ if [ -f $1.all ]
+ then
+ echo "-- Remove generic include file: $1.all"
+ rm $1.all
+ fi
+ for file in "$1"/*
+ do
+ if [ -d "$file" ]
+ then
+ # Recursion:
+ removeGeneric $file
+ fi
+ done
+}
+
+if [ "$1" = "remove" ]; then
+ # Actualize .all files
+ removeGeneric /usr/include/Bpp
+fi
+
+exit 0
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..63fdf40
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,119 @@
+#!/usr/bin/make -f
+# -*- makefile -*-
+# Sample debian/rules that uses debhelper.
+# This file was originally written by Joey Hess and Craig Small.
+# As a special exception, when this file is copied by dh-make into a
+# dh-make output file, you may use that output file without restriction.
+# This special exception was added by Craig Small in version 0.37 of dh-make.
+
+# 24/01/10 Modification for use with CMake by Julien Dutheil.
+
+# Uncomment this to turn on verbose mode.
+#export DH_VERBOSE=1
+
+
+# These are used for cross-compiling and for saving the configure script
+# from having to guess our platform (since we know it already)
+DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
+DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
+
+
+CFLAGS = -Wall -g
+
+ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
+ CFLAGS += -O0
+else
+ CFLAGS += -O2
+endif
+
+# shared library versions
+version=`ls src/lib*.so.* | \
+ awk '{if (match($$0,/[0-9]+\.[0-9]+\.[0-9]+$$/)) print substr($$0,RSTART)}'`
+major=`ls src/lib*.so.* | \
+ awk '{if (match($$0,/\.so\.[0-9]+$$/)) print substr($$0,RSTART+4)}'`
+
+configure:
+ cmake -DCMAKE_INSTALL_PREFIX=/usr .
+
+config.status: configure
+ dh_testdir
+
+build: build-stamp
+build-stamp: config.status
+ dh_testdir
+
+ # Add here commands to compile the package.
+ $(MAKE)
+
+ touch $@
+
+clean:
+ dh_testdir
+ dh_testroot
+
+ # Add here commands to clean up after the build process.
+ [ ! -f Makefile ] || $(MAKE) clean;
+ [ ! -f Makefile ] || rm Makefile;
+ [ ! -f src/Makefile ] || rm src/Makefile;
+ rm -f config.sub config.guess
+ rm -f build-stamp
+ rm -f CMakeCache.txt
+ rm -f *.cmake
+ rm -f src/*.cmake
+ #rm -f test/*.cmake
+ rm -rf CMakeFiles
+ rm -rf src/CMakeFiles
+ #rm -rf test/CMakeFiles
+ rm -rf _CPack_Packages
+ #rm -rf Testing
+ #rm -f DartConfiguration.tcl
+
+ dh_clean
+
+install: build
+ dh_testdir
+ dh_testroot
+ dh_prep
+ dh_installdirs
+
+ # Add here commands to install the package into debian/tmp
+ $(MAKE) DESTDIR=$(CURDIR)/debian/tmp install
+
+
+# Build architecture-independent files here.
+binary-indep: build install
+# We have nothing to do by default.
+
+# Build architecture-dependent files here.
+binary-arch: build install
+ dh_testdir
+ dh_testroot
+ dh_installchangelogs ChangeLog
+ dh_installdocs
+ dh_installexamples
+ dh_install
+# dh_installmenu
+# dh_installdebconf
+# dh_installlogrotate
+# dh_installemacsen
+# dh_installpam
+# dh_installmime
+# dh_installinit
+# dh_installcron
+# dh_installinfo
+ dh_installman
+ dh_link
+ dh_strip
+ dh_compress
+ dh_fixperms
+# dh_perl
+# dh_python
+ dh_makeshlibs
+ dh_installdeb
+ dh_shlibdeps
+ dh_gencontrol
+ dh_md5sums
+ dh_builddeb
+
+binary: binary-indep binary-arch
+.PHONY: build clean binary-indep binary-arch binary install
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 0000000..163aaf8
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/src/Bpp/PopGen/AbstractIDataSet.cpp b/src/Bpp/PopGen/AbstractIDataSet.cpp
new file mode 100644
index 0000000..a31d771
--- /dev/null
+++ b/src/Bpp/PopGen/AbstractIDataSet.cpp
@@ -0,0 +1,71 @@
+//
+// File AbstractIDataSet.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "AbstractIDataSet.h"
+
+using namespace bpp;
+
+// From STL
+#include <fstream>
+
+using namespace std;
+
+AbstractIDataSet::~AbstractIDataSet() {}
+
+void AbstractIDataSet::read(const std::string& path, DataSet& data_set) throw (Exception)
+{
+ ifstream input(path.c_str(), ios::in);
+ read(input, data_set);
+ input.close();
+}
+
+DataSet* AbstractIDataSet::read(std::istream& is) throw (Exception)
+{
+ DataSet* data_set = new DataSet();
+ read(is, *data_set);
+ return data_set;
+}
+
+DataSet* AbstractIDataSet::read(const std::string& path) throw (Exception)
+{
+ DataSet* data_set = new DataSet();
+ read(path, *data_set);
+ return data_set;
+}
diff --git a/src/Bpp/PopGen/AbstractIDataSet.h b/src/Bpp/PopGen/AbstractIDataSet.h
new file mode 100644
index 0000000..30648af
--- /dev/null
+++ b/src/Bpp/PopGen/AbstractIDataSet.h
@@ -0,0 +1,80 @@
+//
+// File AbstractIDataSet.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _ABSTRACTIDATASET_H_
+#define _ABSTRACTIDATASET_H_
+
+#include "IDataSet.h"
+
+#include <Bpp/Exceptions.h>
+
+namespace bpp
+{
+/**
+ * @brief Partial implementation of the DataSet Input interface
+ *
+ * @author Sylvain Gaillard
+ */
+class AbstractIDataSet :
+ public IDataSet
+{
+public:
+ // Class destructor
+ virtual ~AbstractIDataSet();
+
+public:
+ /**
+ * @name The IDataSet interface.
+ * @{
+ */
+ virtual void read(std::istream& is, DataSet& data_set) throw (Exception) = 0;
+
+ virtual void read(const std::string& path, DataSet& data_set) throw (Exception);
+
+ virtual DataSet* read(std::istream& is) throw (Exception);
+
+ virtual DataSet* read(const std::string& path) throw (Exception);
+ /**
+ * @}
+ */
+};
+} // end of namespace bpp;
+
+#endif // _ABSTRACTIDATASET_H_
diff --git a/src/Bpp/PopGen/AbstractODataSet.cpp b/src/Bpp/PopGen/AbstractODataSet.cpp
new file mode 100644
index 0000000..427d92f
--- /dev/null
+++ b/src/Bpp/PopGen/AbstractODataSet.cpp
@@ -0,0 +1,58 @@
+//
+// File AbstractODataSet.cpp
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "AbstractODataSet.h"
+
+using namespace bpp;
+
+// From STL
+#include <fstream>
+
+using namespace std;
+
+AbstractODataSet::~AbstractODataSet() {}
+
+void AbstractODataSet::write(const string& path, const DataSet& data_set, bool overwrite) const throw (Exception)
+{
+ ofstream output(path.c_str(), overwrite ? (ios::out) : (ios::out | ios::app));
+ write(output, data_set);
+ output.close();
+}
+
diff --git a/src/Bpp/PopGen/AbstractODataSet.h b/src/Bpp/PopGen/AbstractODataSet.h
new file mode 100644
index 0000000..4fd6c5c
--- /dev/null
+++ b/src/Bpp/PopGen/AbstractODataSet.h
@@ -0,0 +1,73 @@
+//
+// File AbstractODataSet.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _ABSTRACTODATASET_H_
+#define _ABSTRACTODATASET_H_
+
+#include "ODataSet.h"
+
+namespace bpp
+{
+/**
+ * @brief Partial implementation of the DataSet Output interface.
+ *
+ * @author Sylvain Gaillard
+ */
+class AbstractODataSet :
+ public ODataSet
+{
+public:
+ virtual ~AbstractODataSet();
+
+public:
+ /**
+ * @name The ODataSet interface.
+ * @{
+ */
+ virtual void write(std::ostream& os, const DataSet& data_set) const throw (Exception) = 0;
+ virtual void write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception);
+ /**
+ * @}
+ */
+};
+} // end of namespace bpp;
+
+#endif // _ABSTRACTODATASET_H_
+
diff --git a/src/Bpp/PopGen/AlleleInfo.h b/src/Bpp/PopGen/AlleleInfo.h
new file mode 100644
index 0000000..c2c4c0a
--- /dev/null
+++ b/src/Bpp/PopGen/AlleleInfo.h
@@ -0,0 +1,93 @@
+//
+// File AlleleInfo.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _ALLELEINFO_H_
+#define _ALLELEINFO_H_
+
+// From STL
+#include <string>
+
+#include <Bpp/Clonable.h>
+
+namespace bpp
+{
+/**
+ * @brief The AlleleInfo interface.
+ *
+ * An AlleleInfo is a data structure designed to store informations about
+ * alleles in general like the size of the marker for example.
+ *
+ * @author Sylvain Gaillard
+ */
+class AlleleInfo :
+ public virtual Clonable
+{
+public:
+ // Destructor
+ virtual ~AlleleInfo() {}
+
+public:
+ // Methodes
+ /**
+ * @brief Set the identifier of the allele.
+ */
+ virtual void setId(const std::string& allele_id) = 0;
+
+ /**
+ * @brief Get the identitier of the allele.
+ */
+ virtual const std::string& getId() const = 0;
+
+ /**
+ * @name The Clonable interface
+ *
+ * @{
+ */
+#ifdef NO_VIRTUAL_COV
+ Clonable*
+#else
+ AlleleInfo*
+#endif
+ clone() const = 0;
+ /** @} */
+};
+} // end of namespace bpp;
+
+#endif // _ALLELEINFO_H_
+
diff --git a/src/Bpp/PopGen/AnalyzedLoci.cpp b/src/Bpp/PopGen/AnalyzedLoci.cpp
new file mode 100644
index 0000000..26a4e61
--- /dev/null
+++ b/src/Bpp/PopGen/AnalyzedLoci.cpp
@@ -0,0 +1,219 @@
+//
+// File AnalyzedLoci.cpp
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "AnalyzedLoci.h"
+
+using namespace bpp;
+using namespace std;
+
+/******************************************************************************/
+
+AnalyzedLoci::AnalyzedLoci(size_t number_of_loci) : loci_(vector<LocusInfo*>(number_of_loci))
+{
+ for (size_t i = 0; i < loci_.size(); i++)
+ {
+ loci_[i] = 0;
+ }
+}
+
+/******************************************************************************/
+
+AnalyzedLoci::AnalyzedLoci(const AnalyzedLoci& analyzed_loci) : loci_(vector<LocusInfo*>(analyzed_loci.loci_.size()))
+{
+ for (size_t i = 0; i < analyzed_loci.getNumberOfLoci(); i++)
+ {
+ loci_[i] = new LocusInfo(analyzed_loci.getLocusInfoAtPosition(i));
+ }
+}
+
+/******************************************************************************/
+
+AnalyzedLoci::~AnalyzedLoci()
+{
+ for (size_t i = 0; i < loci_.size(); i++)
+ {
+ delete loci_[i];
+ }
+}
+
+/******************************************************************************/
+
+void AnalyzedLoci::setLocusInfo(
+ size_t locus_position,
+ const LocusInfo& locus)
+throw (IndexOutOfBoundsException)
+{
+ if (locus_position >= 0 && locus_position < loci_.size())
+ loci_[locus_position] = new LocusInfo(locus);
+ else
+ throw IndexOutOfBoundsException("AnalyzedLoci::setLocusInfo: locus_position out of bounds",
+ locus_position, 0, loci_.size());
+}
+
+/******************************************************************************/
+
+size_t AnalyzedLoci::getLocusInfoPosition(
+ const std::string& locus_name) const
+throw (BadIdentifierException)
+{
+ for (size_t i = 0; i < loci_.size(); i++)
+ {
+ if (loci_[i] != NULL && loci_[i]->getName() == locus_name)
+ return i;
+ }
+ throw BadIdentifierException("AnalyzedLoci::getLocusInfoPosition: locus not found.", locus_name);
+}
+
+/******************************************************************************/
+
+const LocusInfo& AnalyzedLoci::getLocusInfoByName(
+ const std::string& locus_name) const
+throw (BadIdentifierException)
+{
+ for (size_t i = 0; i < loci_.size(); i++)
+ {
+ if (loci_[i] != NULL && loci_[i]->getName() == locus_name)
+ return *(loci_[i]);
+ }
+ throw BadIdentifierException("AnalyzedLoci::getLocusInfo: locus not found.",
+ locus_name);
+}
+
+/******************************************************************************/
+
+const LocusInfo& AnalyzedLoci::getLocusInfoAtPosition(
+ size_t locus_position) const
+throw (Exception)
+{
+ if (locus_position >= loci_.size())
+ throw IndexOutOfBoundsException("AnalyzedLoci::getLocusInfoAtPosition: locus_position out of bounds.", locus_position, 0, loci_.size());
+ if (loci_[locus_position] != NULL)
+ return *(loci_[locus_position]);
+ else
+ throw NullPointerException("AnalyzedLoci::getLocusInfo: no locus defined here.");
+}
+
+/******************************************************************************/
+
+// AlleleInfo
+void AnalyzedLoci::addAlleleInfoByLocusName(const std::string& locus_name,
+ const AlleleInfo& allele)
+throw (Exception)
+{
+ bool locus_found = false;
+ for (vector<LocusInfo*>::iterator it = loci_.begin(); it != loci_.end(); it++)
+ {
+ if ((*it)->getName() == locus_name)
+ {
+ locus_found = true;
+ try
+ {
+ (*it)->addAlleleInfo(allele);
+ }
+ catch (BadIdentifierException& bie)
+ {
+ throw BadIdentifierException("AnalyzedLoci::addAlleleInfoByLocusName: allele id already in use.", bie.getIdentifier());
+ }
+ }
+ }
+ if (!locus_found)
+ throw LocusNotFoundException("AnalyzedLoci::addAlleleInfoByLocusName: locus_name not found.",
+ locus_name);
+}
+
+/******************************************************************************/
+
+void AnalyzedLoci::addAlleleInfoByLocusPosition(size_t locus_position,
+ const AlleleInfo& allele)
+throw (Exception)
+{
+ if (locus_position >= 0 && locus_position < loci_.size())
+ {
+ try
+ {
+ loci_[locus_position]->addAlleleInfo(allele);
+ }
+ catch (BadIdentifierException& bie)
+ {
+ throw BadIdentifierException("AnalyzedLoci::addAlleleInfoByLocusPosition: allele id is already in use.", bie.getIdentifier());
+ }
+ }
+ else
+ throw IndexOutOfBoundsException("AnalyzedLoci::addAlleleInfoByLocusPosition: locus_position out of bounds.",
+ locus_position, 0, loci_.size());
+}
+
+/******************************************************************************/
+
+std::vector<size_t> AnalyzedLoci::getNumberOfAlleles() const
+{
+ vector<size_t> allele_count;
+ for (size_t i = 0; i < loci_.size(); i++)
+ {
+ allele_count.push_back(loci_[i]->getNumberOfAlleles());
+ }
+ return allele_count;
+}
+
+/******************************************************************************/
+
+unsigned int AnalyzedLoci::getPloidyByLocusName(const std::string& locus_name) const
+throw (LocusNotFoundException)
+{
+ for (size_t i = 0; i < loci_.size(); i++)
+ {
+ if (loci_[i] != NULL && loci_[i]->getName() == locus_name)
+ return loci_[i]->getPloidy();
+ }
+ throw LocusNotFoundException("AnalyzedLoci::getLocusInfo: locus_name not found.",
+ locus_name);
+}
+
+/******************************************************************************/
+
+unsigned int AnalyzedLoci::getPloidyByLocusPosition(size_t locus_position) const
+throw (IndexOutOfBoundsException)
+{
+ if (locus_position >= loci_.size())
+ throw IndexOutOfBoundsException("AnalyzedLoci::getPloidyByLocusPosition: locus_position out of bounds.", locus_position, 0, loci_.size());
+ return loci_[locus_position]->getPloidy();
+}
+
+/******************************************************************************/
+
diff --git a/src/Bpp/PopGen/AnalyzedLoci.h b/src/Bpp/PopGen/AnalyzedLoci.h
new file mode 100644
index 0000000..a8e1be6
--- /dev/null
+++ b/src/Bpp/PopGen/AnalyzedLoci.h
@@ -0,0 +1,170 @@
+//
+// File AnalyzedLoci.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _ANALYZEDLOCI_H_
+#define _ANALYZEDLOCI_H_
+
+// From STL
+#include <vector>
+#include <string>
+
+#include <Bpp/Exceptions.h>
+
+// From local
+#include "LocusInfo.h"
+#include "GeneralExceptions.h"
+
+namespace bpp
+{
+/**
+ * @brief The AnalyzedLoci class.
+ *
+ * This is a LocusInfo container.
+ * Its instanciation requires a number of locus wich is fixed
+ * and can't be modified.
+ *
+ * @author Sylvain Gaillard
+ */
+class AnalyzedLoci
+{
+private:
+ std::vector<LocusInfo*> loci_;
+
+public:
+ // Constructors and Destructor
+ /**
+ * @brief Build a void AnalyzedLoci with a specific number of loci.
+ */
+ AnalyzedLoci(size_t number_of_loci);
+
+ /**
+ * @brief Copy constructor.
+ */
+ AnalyzedLoci(const AnalyzedLoci& analyzed_loci);
+
+ /**
+ * @brief Destroy the AnalyzedLoci.
+ */
+ ~AnalyzedLoci();
+
+public:
+ // Other methodes
+ /**
+ * @brief Set a LocusInfo.
+ *
+ * @throw IndexOutOfBoundsException if locus_position is out of bounds.
+ */
+ void setLocusInfo(size_t locus_position, const LocusInfo& locus)
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the position of a LocusInfo.
+ *
+ * @throw BadIdentifierException if locus_name is not found.
+ */
+ size_t getLocusInfoPosition(const std::string& locus_name) const
+ throw (BadIdentifierException);
+
+ /**
+ * @brief Get a LocusInfo by name.
+ *
+ * @throw BadIdentifierException if locus_name is not found.
+ */
+ const LocusInfo& getLocusInfoByName(const std::string& locus_name) const
+ throw (BadIdentifierException);
+
+ /**
+ * @brief Get a LocusInfo by its position.
+ *
+ * @throw NullPointerException if the LocusInfo is not difined.
+ * @throw IndexOutOfBoundsException if locus_position is out of bounds.
+ */
+ const LocusInfo& getLocusInfoAtPosition(size_t locus_position) const
+ throw (Exception);
+
+ /**
+ * @brief Add an AlleleInfo to a LocusInfo by LocusInfo name.
+ *
+ * @throw BadIdentifierException if the allele's id is already in use.
+ * @throw LocusNotFoundException if locus_name is not found.
+ */
+ void addAlleleInfoByLocusName(const std::string& locus_name,
+ const AlleleInfo& allele)
+ throw (Exception);
+
+ /**
+ * @brief Add an AlleleInfo to a LocusInfo by its position.
+ *
+ * @throw BadIdentifierException if the allele's id is already in use.
+ * @throw IndexOutOfBoundsException if locus_position is out of bounds.
+ */
+ void addAlleleInfoByLocusPosition(size_t locus_position,
+ const AlleleInfo& allele)
+ throw (Exception);
+
+ /**
+ * @brief Get the number of loci.
+ */
+ size_t getNumberOfLoci() const { return loci_.size(); }
+
+ /**
+ * @brief Get the number of alleles at each locus.
+ */
+ std::vector<size_t> getNumberOfAlleles() const;
+
+ /**
+ * @brief Get the ploidy of a locus by name.
+ *
+ * @throw LocusNotFoundException if locus_name is not found.
+ */
+ unsigned int getPloidyByLocusName(const std::string& locus_name) const
+ throw (LocusNotFoundException);
+
+ /**
+ * @brief Get the ploidy of a locus by its position.
+ *
+ * @throw IndexOutOfBoundsException if locus_position is out of bounds.
+ */
+ unsigned int getPloidyByLocusPosition(size_t locus_position) const
+ throw (IndexOutOfBoundsException);
+};
+} // end of namespace bpp;
+
+#endif // _ANALYZEDLOCI_H_
+
diff --git a/src/Bpp/PopGen/AnalyzedSequences.cpp b/src/Bpp/PopGen/AnalyzedSequences.cpp
new file mode 100644
index 0000000..3a41892
--- /dev/null
+++ b/src/Bpp/PopGen/AnalyzedSequences.cpp
@@ -0,0 +1,128 @@
+//
+// File AnalyzedSequences.cpp
+// Created by: Sylvain Gaillard
+// Created on: Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "AnalyzedSequences.h"
+#include <Bpp/Seq/Alphabet/DNA.h>
+#include <Bpp/Seq/Alphabet/RNA.h>
+#include <Bpp/Seq/Alphabet/ProteicAlphabet.h>
+
+using namespace bpp;
+using namespace std;
+
+AnalyzedSequences::AnalyzedSequences() : alphabet_(0),
+ autoset_(false) {}
+
+AnalyzedSequences::AnalyzedSequences(const Alphabet* alpha) : alphabet_(alpha),
+ autoset_(false) {}
+
+AnalyzedSequences::~AnalyzedSequences()
+{
+ clear_();
+}
+
+AnalyzedSequences::AnalyzedSequences(const AnalyzedSequences& as) : alphabet_(0),
+ autoset_(false)
+{
+ if (as.autoset_)
+ {
+ setAlphabet(as.getAlphabetType());
+ }
+ else
+ {
+ alphabet_ = as.alphabet_;
+ }
+ autoset_ = as.autoset_;
+}
+
+AnalyzedSequences& AnalyzedSequences::operator=(const AnalyzedSequences& as)
+{
+ if (as.autoset_)
+ {
+ setAlphabet(as.getAlphabetType());
+ }
+ else
+ {
+ alphabet_ = as.alphabet_;
+ }
+ autoset_ = as.autoset_;
+ return *this;
+}
+
+void AnalyzedSequences::setAlphabet(const Alphabet* alpha)
+{
+ alphabet_ = alpha;
+ autoset_ = false;
+}
+
+void AnalyzedSequences::setAlphabet(const std::string& alpha_type) throw (Exception)
+{
+ if (alpha_type != string("DNA") && alpha_type != string("RNA") && alpha_type != string("PROTEIN"))
+ throw Exception(string("AnalyzedSequences::setAlphabet: bad alphabet type. (") + alpha_type + string(")."));
+ Alphabet* alpha = 0;
+ if (alpha_type == string("DNA"))
+ alpha = new DNA();
+ if (alpha_type == string("RNA"))
+ alpha = new RNA();
+ if (alpha_type == string("PROTEIN"))
+ alpha = new ProteicAlphabet();
+ alphabet_ = alpha;
+ autoset_ = true;
+}
+
+std::string AnalyzedSequences::getAlphabetType() const
+{
+ if (alphabet_ == 0)
+ return string("---");
+ string alpha_type = alphabet_->getAlphabetType();
+ size_t bs = alpha_type.find(" ", 0);
+ alpha_type = string(alpha_type.begin(), alpha_type.begin() + bs);
+ if (alpha_type == "Proteic")
+ alpha_type = "PROTEIN";
+ return alpha_type;
+}
+
+void AnalyzedSequences::clear_()
+{
+ if (alphabet_ != 0 && autoset_)
+ {
+ delete alphabet_;
+ alphabet_ = 0;
+ autoset_ = false;
+ }
+}
diff --git a/src/Bpp/PopGen/AnalyzedSequences.h b/src/Bpp/PopGen/AnalyzedSequences.h
new file mode 100644
index 0000000..56f4471
--- /dev/null
+++ b/src/Bpp/PopGen/AnalyzedSequences.h
@@ -0,0 +1,119 @@
+//
+// File AnalyzedSequences.h
+// Created by: Sylvain Gaillard
+// Created on: Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _ANALYZEDSEQUENCES_H_
+#define _ANALYZEDSEQUENCES_H_
+
+// From Seq
+#include <Bpp/Seq/Alphabet/Alphabet.h>
+
+namespace bpp
+{
+/**
+ * @brief The AnalyzedSequences class.
+ *
+ * This is a class to store info about the sequences.
+ *
+ * The object stores a pointer toward a const Alphabet.
+ * The way the pointer is managed depend on the method used to set it.
+ *
+ * If one use a method using a const Alphabet* to set the Alphabet, then he
+ * has to take care of the memory management (i.e. freeing the Alphabet
+ * object).
+ *
+ * If one use a method that create an Alphabet object like those using a
+ * string description of the Alphabet then the AnalyzedSequences object will
+ * delete himself the Alphabet object on destruction.
+ *
+ * Be carefull when copying an AnalyzedSequences object, the way that the
+ * Alphabet object is managed is also copyed then if the initial
+ * AnalyzedSequences takes care of its Alphabet member then the copy will hold
+ * copy af the Alphabet an manage it else the new AnalyzedSequences will just
+ * copy the pointer and it's up to the user to take care of its deletion.
+ *
+ * @author Sylvain Gaillard
+ */
+class AnalyzedSequences
+{
+private:
+ const Alphabet* alphabet_;
+ bool autoset_;
+
+public:
+ // Constructor and destructor
+ AnalyzedSequences();
+ AnalyzedSequences(const Alphabet* alpha);
+ ~AnalyzedSequences();
+
+ // Copie constructor
+ AnalyzedSequences(const AnalyzedSequences& as);
+ AnalyzedSequences& operator=(const AnalyzedSequences& as);
+
+public:
+ /**
+ * @brief Set the alphabet used for the sequences.
+ */
+ void setAlphabet(const Alphabet* alpha);
+
+ /**
+ * @brief Set the alphabet used for the sequences by alphabet type.
+ */
+ void setAlphabet(const std::string& alpha_type) throw (Exception);
+
+ /**
+ * @brief Get the alphabet.
+ */
+ const Alphabet* getAlphabet() const
+ {
+ return alphabet_;
+ }
+
+ /**
+ * @brief Get the alphabet type as a string.
+ */
+ std::string getAlphabetType() const;
+
+private:
+ void clear_();
+};
+} // end of namespace bpp;
+
+#endif // _ANALYZEDSEQUENCES_H_
+
diff --git a/src/Bpp/PopGen/BasicAlleleInfo.cpp b/src/Bpp/PopGen/BasicAlleleInfo.cpp
new file mode 100644
index 0000000..0962d0f
--- /dev/null
+++ b/src/Bpp/PopGen/BasicAlleleInfo.cpp
@@ -0,0 +1,81 @@
+//
+// File BasicAlleleInfo.cpp
+// Author : Sylvain Gaillard <yragael2001 at yahoo.fr>
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "BasicAlleleInfo.h"
+
+using namespace bpp;
+
+// ** Class constructor: *******************************************************/
+
+BasicAlleleInfo::BasicAlleleInfo(const std::string& id) : id_(id) {}
+
+BasicAlleleInfo::BasicAlleleInfo(const BasicAlleleInfo& allele) : id_(allele.getId()) {}
+
+// ** Class destructor: *******************************************************/
+
+BasicAlleleInfo::~BasicAlleleInfo() {}
+
+// ** Other methodes: *********************************************************/
+
+BasicAlleleInfo& BasicAlleleInfo::operator=(const BasicAlleleInfo& allele)
+{
+ id_ = allele.getId();
+ return *this;
+}
+
+bool BasicAlleleInfo::operator==(const BasicAlleleInfo& allele) const
+{
+ return id_ == allele.getId();
+}
+
+bool BasicAlleleInfo::operator!=(const BasicAlleleInfo& allele) const
+{
+ return !(id_ == allele.getId());
+}
+
+void BasicAlleleInfo::setId(const std::string& allele_id)
+{
+ id_ = allele_id;
+}
+
+const std::string& BasicAlleleInfo::getId() const
+{
+ return id_;
+}
+
diff --git a/src/Bpp/PopGen/BasicAlleleInfo.h b/src/Bpp/PopGen/BasicAlleleInfo.h
new file mode 100644
index 0000000..f74b940
--- /dev/null
+++ b/src/Bpp/PopGen/BasicAlleleInfo.h
@@ -0,0 +1,117 @@
+//
+// File BasicAlleleInfo.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _BASICALLELEINFO_H_
+#define _BASICALLELEINFO_H_
+
+// From local Pop
+#include "AlleleInfo.h"
+#include "GeneralExceptions.h"
+
+namespace bpp
+{
+/**
+ * @brief The BasicAlleleInfo class.
+ *
+ * This is the simplest allele class implementation which contains just an identitier.
+ *
+ * @author Sylvain Gaillard
+ */
+class BasicAlleleInfo :
+ public AlleleInfo
+{
+private:
+ std::string id_;
+
+public:
+ // Constructors and destructor
+ /**
+ * @brief Build a new allele.
+ *
+ * @param id The identity number of the allele.
+ */
+ BasicAlleleInfo(const std::string& id);
+
+ /**
+ * @brief The BasicAlleleInfo copy constructor.
+ */
+ BasicAlleleInfo(const BasicAlleleInfo& allele);
+
+ virtual ~BasicAlleleInfo();
+
+public:
+ // Methodes
+ /**
+ * @brief The assignation operator.
+ */
+ virtual BasicAlleleInfo& operator=(const BasicAlleleInfo& allele);
+
+ /**
+ * @brief The == operator.
+ */
+ virtual bool operator==(const BasicAlleleInfo& allele) const;
+
+ /**
+ * @brief The != operator.
+ */
+ virtual bool operator!=(const BasicAlleleInfo& allele) const;
+
+ /**
+ * @name The Clonable interface
+ * @{
+ */
+#ifdef NO_VIRTUAL_COV
+ Clonable*
+#else
+ BasicAlleleInfo*
+#endif
+ clone() const { return new BasicAlleleInfo(*this); }
+ /** @} */
+
+ /**
+ * @name The AlleleInfo interface
+ */
+ void setId(const std::string& allele_id);
+ const std::string& getId() const;
+ /** @} */
+};
+} // end of namespace bpp;
+
+#endif // _BASICALLELEINFO_H_
+
diff --git a/src/Bpp/PopGen/BiAlleleMonolocusGenotype.cpp b/src/Bpp/PopGen/BiAlleleMonolocusGenotype.cpp
new file mode 100644
index 0000000..56bb4ea
--- /dev/null
+++ b/src/Bpp/PopGen/BiAlleleMonolocusGenotype.cpp
@@ -0,0 +1,119 @@
+//
+// File BiAlleleMonolocusGenotype.cpp
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "BiAlleleMonolocusGenotype.h"
+
+using namespace bpp;
+using namespace std;
+
+// ** Class constructor: *******************************************************/
+
+BiAlleleMonolocusGenotype::BiAlleleMonolocusGenotype(
+ size_t first_allele_index,
+ size_t second_allele_index) : allele_index_(vector<size_t>(2))
+{
+ allele_index_[0] = first_allele_index;
+ allele_index_[1] = second_allele_index;
+}
+
+BiAlleleMonolocusGenotype::BiAlleleMonolocusGenotype(std::vector<size_t> allele_index) throw (BadSizeException) : allele_index_(vector<size_t>(2))
+{
+ if (allele_index.size() != 2)
+ throw BadSizeException("BiAlleleMonolocusGenotype::BiAlleleMonolocusGenotype: allele_index must contain two values.", allele_index.size(), 2);
+ allele_index_[0] = allele_index[0];
+ allele_index_[1] = allele_index[1];
+}
+
+BiAlleleMonolocusGenotype::BiAlleleMonolocusGenotype(const BiAlleleMonolocusGenotype& bmg) : allele_index_(vector<size_t>(2))
+{
+ for (size_t i = 0; i < 2; i++)
+ {
+ allele_index_[i] = bmg.getAlleleIndex()[i];
+ }
+}
+
+// ** Class destructor: ********************************************************/
+
+BiAlleleMonolocusGenotype::~BiAlleleMonolocusGenotype()
+{
+ allele_index_.clear();
+}
+
+// ** Other methodes: **********************************************************/
+
+BiAlleleMonolocusGenotype& BiAlleleMonolocusGenotype::operator=(const BiAlleleMonolocusGenotype& bmg)
+{
+ for (size_t i = 0; i < 2; i++)
+ {
+ allele_index_.push_back(bmg.getAlleleIndex()[i]);
+ }
+ return *this;
+}
+
+bool BiAlleleMonolocusGenotype::operator==(const BiAlleleMonolocusGenotype& bmg) const
+{
+ return (allele_index_[0] == bmg.getAlleleIndex()[0] && allele_index_[1] == bmg.getAlleleIndex()[1])
+ || (allele_index_[0] == bmg.getAlleleIndex()[1] && allele_index_[1] == bmg.getAlleleIndex()[0]);
+}
+
+size_t BiAlleleMonolocusGenotype::getFirstAlleleIndex() const
+{
+ return allele_index_[0];
+}
+
+size_t BiAlleleMonolocusGenotype::getSecondAlleleIndex() const
+{
+ return allele_index_[1];
+}
+
+bool BiAlleleMonolocusGenotype::isHomozygous() const
+{
+ return allele_index_[0] == allele_index_[1];
+}
+
+std::vector<size_t> BiAlleleMonolocusGenotype::getAlleleIndex() const
+{
+ return allele_index_;
+}
+
+BiAlleleMonolocusGenotype* BiAlleleMonolocusGenotype::clone() const
+{
+ return new BiAlleleMonolocusGenotype(*this);
+}
+
diff --git a/src/Bpp/PopGen/BiAlleleMonolocusGenotype.h b/src/Bpp/PopGen/BiAlleleMonolocusGenotype.h
new file mode 100644
index 0000000..fd4bfe5
--- /dev/null
+++ b/src/Bpp/PopGen/BiAlleleMonolocusGenotype.h
@@ -0,0 +1,133 @@
+//
+// File BiAlleleMonolocusGenotype.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+// Secured inclusion of header's file
+#ifndef _BIALLELEMONOLOCUSGENOTYPE_H_
+#define _BIALLELEMONOLOCUSGENOTYPE_H_
+
+// From STL
+#include <vector>
+
+#include <Bpp/Exceptions.h>
+
+// From local
+#include "MonolocusGenotype.h"
+
+namespace bpp
+{
+/**
+ * @brief The BiAlleleMonolocusGenotype class.
+ *
+ * @author Sylvain Gaillard
+ */
+class BiAlleleMonolocusGenotype :
+ public MonolocusGenotype
+{
+private:
+ std::vector<size_t> allele_index_;
+
+public:
+ // Constructors and destructor
+ /**
+ * @brief Build a monolocus genotype containing two alleles.
+ */
+ BiAlleleMonolocusGenotype(size_t first_allele_index,
+ size_t second_allele_index);
+
+ /**
+ * @brief Build a monolocus genotype containing two alleles.
+ */
+ BiAlleleMonolocusGenotype(std::vector<size_t> allele_index) throw (BadSizeException);
+
+ /**
+ * @brief Copy constructor.
+ */
+ BiAlleleMonolocusGenotype(const BiAlleleMonolocusGenotype& bmg);
+
+ /**
+ * @brief Destroy the BiAlleleMonolocusGenotype.
+ */
+ ~BiAlleleMonolocusGenotype();
+
+public:
+ // Other methodes
+ /**
+ * @brief The affectation operator.
+ */
+ BiAlleleMonolocusGenotype& operator=(const BiAlleleMonolocusGenotype& bmg);
+
+ /**
+ * @brief The == operator.
+ */
+ bool operator==(const BiAlleleMonolocusGenotype& bmg) const;
+
+ /**
+ * @brief Get the first allele index.
+ */
+ size_t getFirstAlleleIndex() const;
+
+ /**
+ * @brief Get the second allele index.
+ */
+ size_t getSecondAlleleIndex() const;
+
+ /**
+ * @brief Test the homozygozity of the locus.
+ */
+ bool isHomozygous() const;
+
+ /**
+ * @name The MonolocusGenotype interface:
+ *
+ * @{
+ */
+ std::vector<size_t> getAlleleIndex() const;
+ /** @} */
+
+ /**
+ * @name The Clonable interface:
+ *
+ * @{
+ */
+ BiAlleleMonolocusGenotype* clone() const;
+ /** @} */
+};
+} // end of namespace bpp;
+
+#endif // _BIALLELEMONOLOCUSGENOTYPE_H_
+
diff --git a/src/Bpp/PopGen/DarwinDon.cpp b/src/Bpp/PopGen/DarwinDon.cpp
new file mode 100644
index 0000000..7182c69
--- /dev/null
+++ b/src/Bpp/PopGen/DarwinDon.cpp
@@ -0,0 +1,82 @@
+//
+// File DarwinDon.cpp
+// Authors : Sylvain Gaillard
+// Last modification : April 7, 2008
+//
+
+/*
+ Copyright or © or Copr. CNRS, (April 7, 2008)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "DarwinDon.h"
+
+#include <Bpp/Io/OutputStream.h>
+
+using namespace bpp;
+using namespace std;
+
+DarwinDon::DarwinDon() {}
+
+DarwinDon::~DarwinDon() {}
+
+void DarwinDon::write(ostream& os, const DataSet& data_set) const throw (Exception)
+{
+ if (!os)
+ throw IOException("DarwinDon::write: fail to open stream.");
+ StlOutputStreamWrapper out(&os);
+ (out << "@DARwin 5.0 - DON").endLine();
+ size_t ind_nbr = 0;
+ for (size_t i = 0; i < data_set.getNumberOfGroups(); i++)
+ {
+ ind_nbr += data_set.getNumberOfIndividualsInGroup(i);
+ }
+ vector<string> header;
+ header.push_back("N°");
+ header.push_back("Name");
+ (out << ind_nbr << "\t" << header.size() - 1).endLine();
+ VectorTools::print(header, out, "\t");
+ // size_t ind_index = 0;
+ for (size_t i = 0; i < data_set.getNumberOfGroups(); i++)
+ {
+ size_t ind_nbr_ig = data_set.getNumberOfIndividualsInGroup(i);
+ for (size_t j = 0; j < ind_nbr_ig; j++)
+ {
+ (out << j + (i * ind_nbr_ig) + 1 << "\t" << data_set.getIndividualAtPositionFromGroup(i, j)->getId()).endLine();
+ }
+ }
+}
+
+void DarwinDon::write(const string& path, const DataSet& data_set, bool overwrite) const throw (Exception)
+{
+ AbstractODataSet::write(path, data_set, overwrite);
+}
+
diff --git a/src/Bpp/PopGen/DarwinDon.h b/src/Bpp/PopGen/DarwinDon.h
new file mode 100644
index 0000000..cb9bd86
--- /dev/null
+++ b/src/Bpp/PopGen/DarwinDon.h
@@ -0,0 +1,96 @@
+//
+// File DarwinDon.h
+// Author : Sylvain Gaillard
+// Last modification : April 7, 2008
+//
+
+/*
+ Copyright or © or Copr. CNRS, (April 7, 2008)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _DARWIN_DON_H_
+#define _DARWIN_DON_H_
+
+#include <Bpp/Exceptions.h>
+#include <Bpp/Io/FileTools.h>
+#include <Bpp/Text/TextTools.h>
+#include <Bpp/Text/StringTokenizer.h>
+
+// From local Pop
+#include "AbstractODataSet.h"
+
+namespace bpp
+{
+/**
+ * @brief The Darwin .don output format for popgenlib.
+ *
+ * @author Sylvain Gaillard
+ */
+class DarwinDon :
+ public virtual AbstractODataSet
+{
+public:
+ // Constructor and destructor
+ DarwinDon();
+ ~DarwinDon();
+
+public:
+ /**
+ * @name The ODataSet interface.
+ * @{
+ */
+ void write(std::ostream& os, const DataSet& data_set) const throw (Exception);
+ void write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception);
+ /**
+ * @}
+ */
+
+ /**
+ * @name The IOFormat interface
+ * @{
+ */
+ const std::string getFormatName() const
+ {
+ return "Darwin .don";
+ }
+ const std::string getFormatDescription() const
+ {
+ return "Darwin .don file store data identifying individuals.";
+ }
+ /**
+ * @}
+ */
+};
+} // end of namespace bpp;
+
+#endif // _DARWIN_DON_H_
+
diff --git a/src/Bpp/PopGen/DarwinVarSingle.cpp b/src/Bpp/PopGen/DarwinVarSingle.cpp
new file mode 100644
index 0000000..55936e6
--- /dev/null
+++ b/src/Bpp/PopGen/DarwinVarSingle.cpp
@@ -0,0 +1,114 @@
+//
+// File DarwinVarSingle.cpp
+// Authors : Sylvain Gaillard
+// Last modification : April 7, 2008
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (April 7, 2008)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "DarwinVarSingle.h"
+
+using namespace bpp;
+using namespace std;
+
+DarwinVarSingle::DarwinVarSingle(size_t missingData) : missingData_(missingData) {}
+
+DarwinVarSingle::~DarwinVarSingle() {}
+
+void DarwinVarSingle::write(ostream& os, const DataSet& data_set) const throw (Exception)
+{
+ if (!os)
+ throw IOException("DarwinVarSingle::write: fail to open stream.");
+ StlOutputStreamWrapper out(&os);
+ (out << "@DARwin 5.0 - SINGLE").endLine();
+ size_t ind_nbr = 0;
+ for (size_t i = 0; i < data_set.getNumberOfGroups(); i++)
+ {
+ ind_nbr += data_set.getNumberOfIndividualsInGroup(i);
+ }
+ vector<string> header;
+ header.push_back("Unit");
+ for (size_t i = 0; i < data_set.getNumberOfLoci(); i++)
+ {
+ const LocusInfo& li = data_set.getLocusInfoAtPosition(i);
+ for (size_t j = 0; j < li.getNumberOfAlleles(); j++)
+ {
+ header.push_back(li.getName() + "." + li.getAlleleInfoByKey(j).getId());
+ }
+ }
+ size_t var_nbr = header.size() - 1;
+ // header.push_back("Name");
+ (out << ind_nbr << "\t" << var_nbr).endLine();
+ VectorTools::print(header, out, "\t");
+ // size_t ind_index = 0;
+ const AnalyzedLoci* al = data_set.getAnalyzedLoci();
+ for (size_t i = 0; i < data_set.getNumberOfGroups(); i++)
+ {
+ size_t ind_nbr_ig = data_set.getNumberOfIndividualsInGroup(i);
+ for (size_t j = 0; j < ind_nbr_ig; j++)
+ {
+ vector<size_t> var;
+ const MultilocusGenotype& geno = data_set.getIndividualAtPositionFromGroup(i, j)->getGenotype();
+ for (size_t k = 0; k < geno.size(); k++)
+ {
+ const MonolocusGenotype& mg = geno.getMonolocusGenotype(k);
+ if (geno.isMonolocusGenotypeMissing(k))
+ {
+ for (size_t l = 0; l < al->getNumberOfAlleles()[k]; l++)
+ {
+ var.push_back(missingData_);
+ }
+ }
+ else
+ {
+ for (size_t l = 0; l < al->getNumberOfAlleles()[k]; l++)
+ {
+ size_t flag = 0;
+ if (VectorTools::contains(mg.getAlleleIndex(), l))
+ flag = 1;
+ var.push_back(flag);
+ }
+ }
+ // var.push_back((mg->getAlleleIndex()).size());
+ }
+ (out << j + (i * ind_nbr_ig) + 1 << "\t" << VectorTools::paste(var, "\t")).endLine();
+ }
+ }
+}
+
+void DarwinVarSingle::write(const string& path, const DataSet& data_set, bool overwrite) const throw (Exception)
+{
+ AbstractODataSet::write(path, data_set, overwrite);
+}
+
diff --git a/src/Bpp/PopGen/DarwinVarSingle.h b/src/Bpp/PopGen/DarwinVarSingle.h
new file mode 100644
index 0000000..9e677d7
--- /dev/null
+++ b/src/Bpp/PopGen/DarwinVarSingle.h
@@ -0,0 +1,99 @@
+//
+// File DarwinVarSingle.h
+// Author : Sylvain Gaillard
+// Last modification : April 7, 2008
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (April 7, 2008)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _DARWIN_VAR_SINGLE_H_
+#define _DARWIN_VAR_SINGLE_H_
+
+#include <Bpp/Exceptions.h>
+#include <Bpp/Io/FileTools.h>
+#include <Bpp/Text/TextTools.h>
+#include <Bpp/Text/StringTokenizer.h>
+
+// From local Pop
+#include "AbstractODataSet.h"
+
+namespace bpp
+{
+/**
+ * @brief The Darwin .don output format for popgenlib.
+ *
+ * @author Sylvain Gaillard
+ */
+class DarwinVarSingle :
+ public virtual AbstractODataSet
+{
+private:
+ size_t missingData_;
+
+public:
+ // Constructor and destructor
+ DarwinVarSingle(size_t missingData = 999);
+ ~DarwinVarSingle();
+
+public:
+ /**
+ * @name The ODataSet interface.
+ * @{
+ */
+ void write(std::ostream& os, const DataSet& data_set) const throw (Exception);
+ void write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception);
+ /**
+ * @}
+ */
+
+ /**
+ * @name The IOFormat interface
+ * @{
+ */
+ virtual const std::string getFormatName() const
+ {
+ return "Darwin .var single data";
+ }
+ virtual const std::string getFormatDescription() const
+ {
+ return "Darwin .var file store data for each marker in each individual (1 variable per allele).";
+ }
+ /**
+ * @}
+ */
+};
+} // end of namespace bpp;
+
+#endif // _DARWIN_VAR_SINGLE_H_
+
diff --git a/src/Bpp/PopGen/DataSet.cpp b/src/Bpp/PopGen/DataSet.cpp
new file mode 100644
index 0000000..1d382b8
--- /dev/null
+++ b/src/Bpp/PopGen/DataSet.cpp
@@ -0,0 +1,1362 @@
+//
+// File DataSet.cpp
+// Author : Sylvain Gaillard
+// Khalid Belkhir
+// Last modification : November 10, 2008
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "DataSet.h"
+
+using namespace bpp;
+using namespace std;
+
+// ** Class constructor: *******************************************************/
+
+DataSet::DataSet() : analyzedLoci_(0),
+ analyzedSequences_(0),
+ localities_(vector<Locality<double>*>()),
+ groups_(vector<Group*>()) {}
+
+/******************************************************************************/
+
+DataSet::DataSet(const DataSet& ds) : analyzedLoci_(0),
+ analyzedSequences_(0),
+ localities_(vector<Locality<double>*>()),
+ groups_(vector<Group*>())
+{
+ if (ds.analyzedLoci_ != 0)
+ analyzedLoci_ = new AnalyzedLoci(*(ds.analyzedLoci_));
+ if (ds.analyzedSequences_ != 0)
+ analyzedSequences_ = new AnalyzedSequences(*(ds.analyzedSequences_));
+ if (ds.localities_.size() != 0)
+ for (size_t i = 0; i < ds.localities_.size(); i++)
+ {
+ localities_.push_back(new Locality<double>(*(ds.localities_[i])));
+ }
+ if (ds.groups_.size() != 0)
+ for (size_t i = 0; i < ds.groups_.size(); i++)
+ {
+ groups_.push_back(new Group(*(ds.groups_[i])));
+ }
+}
+
+/******************************************************************************/
+
+DataSet& DataSet::operator=(const DataSet& ds)
+{
+ if (ds.analyzedLoci_ != 0)
+ analyzedLoci_ = new AnalyzedLoci(*(ds.analyzedLoci_));
+ if (ds.analyzedSequences_ != 0)
+ analyzedSequences_ = new AnalyzedSequences(*(ds.analyzedSequences_));
+ if (ds.localities_.size() != 0)
+ for (size_t i = 0; i < ds.localities_.size(); i++)
+ {
+ localities_.push_back(new Locality<double>(*(ds.localities_[i])));
+ }
+ if (ds.groups_.size() != 0)
+ for (size_t i = 0; i < ds.groups_.size(); i++)
+ {
+ groups_.push_back(new Group(*(ds.groups_[i])));
+ }
+ return *this;
+}
+
+// ** Class destructor: *******************************************************/
+DataSet::~DataSet()
+{
+ if (getNumberOfGroups() > 0)
+ for (size_t i = 0; i < getNumberOfGroups(); i++)
+ {
+ delete groups_[i];
+ }
+ if (analyzedLoci_ != 0)
+ delete analyzedLoci_;
+ if (getNumberOfLocalities() > 0)
+ for (size_t i = 0; i < getNumberOfLocalities(); i++)
+ {
+ delete localities_[i];
+ }
+ if (analyzedSequences_ != 0)
+ delete analyzedSequences_;
+}
+
+// ** Other methodes: *********************************************************/
+
+// Dealing with Localities ---------------------------------
+void DataSet::addLocality(Locality<double>& locality) throw (BadIdentifierException)
+{
+ for (size_t i = 0; i < localities_.size(); i++)
+ {
+ if (localities_[i]->getName() == locality.getName())
+ throw BadIdentifierException("DataSet::addLocality: locality name already in use.", locality.getName());
+ }
+ localities_.push_back(new Locality<double>(locality));
+}
+
+/******************************************************************************/
+
+size_t DataSet::getLocalityPosition(const std::string& name) const throw (LocalityNotFoundException)
+{
+ for (size_t i = 0; i < localities_.size(); i++)
+ {
+ if (localities_[i]->getName() == name)
+ return i;
+ }
+ throw LocalityNotFoundException("DataSet::getLocalityPosition: Locality not found.", name);
+}
+
+/******************************************************************************/
+
+const Locality<double>& DataSet::getLocalityAtPosition(size_t locality_position) const throw (IndexOutOfBoundsException)
+{
+ if (locality_position >= localities_.size())
+ throw IndexOutOfBoundsException("DataSet::getLocalityAtPosition: locality_position out of bounds.", locality_position, 0, localities_.size());
+ return *(localities_[locality_position]);
+}
+
+/******************************************************************************/
+
+const Locality<double>& DataSet::getLocalityByName(const std::string& name) const throw (LocalityNotFoundException)
+{
+ try
+ {
+ return getLocalityAtPosition(getLocalityPosition(name));
+ }
+ catch (LocalityNotFoundException& lnfe)
+ {
+ throw LocalityNotFoundException("DataSet::getLocalityByName: Locality not found.", name);
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::deleteLocalityAtPosition(size_t locality_position) throw (IndexOutOfBoundsException)
+{
+ if (locality_position >= localities_.size())
+ throw IndexOutOfBoundsException("DataSet::deleteLocalityAtPosition: locality_position out of bounds.", locality_position, 0, localities_.size());
+ delete localities_[locality_position];
+ localities_.erase(localities_.begin() + locality_position);
+}
+
+/******************************************************************************/
+
+void DataSet::deleteLocalityByName(const std::string& name) throw (LocalityNotFoundException)
+{
+ try
+ {
+ deleteLocalityAtPosition(getLocalityPosition(name));
+ }
+ catch (LocalityNotFoundException& lnfe)
+ {
+ throw LocalityNotFoundException("DataSet::deleteLocalityByName: Locality not found.", name);
+ }
+}
+
+/******************************************************************************/
+
+size_t DataSet::getNumberOfLocalities() const
+{
+ return localities_.size();
+}
+
+/******************************************************************************/
+
+bool DataSet::hasLocality() const
+{
+ return getNumberOfLocalities() > 0;
+}
+
+/******************************************************************************/
+
+// Dealing with groups -------------------------------------
+void DataSet::addGroup(const Group& group) throw (BadIdentifierException)
+{
+ for (size_t i = 0; i < groups_.size(); i++)
+ {
+ if (group.getGroupId() == groups_[i]->getGroupId())
+ throw BadIdentifierException("DataSet::addGroup: group id already in use.", group.getGroupId());
+ }
+ groups_.push_back(new Group(group));
+}
+
+/******************************************************************************/
+
+void DataSet::addEmptyGroup(size_t group_id) throw (BadIdentifierException)
+{
+ for (size_t i = 0; i < groups_.size(); i++)
+ {
+ if (group_id == groups_[i]->getGroupId())
+ throw BadIdentifierException("DataSet::addEmptyGroup: group_id already in use.", group_id);
+ }
+ groups_.push_back(new Group(group_id));
+}
+
+/******************************************************************************/
+
+const Group& DataSet::getGroupById(size_t group_id) const throw (GroupNotFoundException)
+{
+ for (size_t i = 0; i < groups_.size(); i++)
+ {
+ if (group_id == groups_[i]->getGroupId())
+ return *(groups_[i]);
+ }
+ throw GroupNotFoundException("DataSet::getGroupById: group_id not found.", group_id);
+}
+
+/******************************************************************************/
+
+string DataSet::getGroupName(size_t group_id) const throw (GroupNotFoundException)
+{
+ string name;
+ name = getGroupById(group_id).getGroupName();
+ if (!name.empty() )
+ return name;
+ else
+ return TextTools::toString(group_id);
+ throw GroupNotFoundException("DataSet::getGroupName: group_id not found.", group_id);
+}
+
+/******************************************************************************/
+
+void DataSet::setGroupName(size_t group_id, const std::string& group_name) const throw (GroupNotFoundException)
+{
+ for (size_t i = 0; i < groups_.size(); i++)
+ {
+ if (group_id == groups_[i]->getGroupId())
+ {
+ groups_[i]->setGroupName(group_name);
+ return;
+ }
+ }
+ throw GroupNotFoundException("DataSet::setGroupName: group_id not found.", group_id);
+}
+
+/******************************************************************************/
+
+size_t DataSet::getGroupPosition(size_t group_id) const throw (GroupNotFoundException)
+{
+ for (size_t i = 0; i < groups_.size(); i++)
+ {
+ if (group_id == groups_[i]->getGroupId())
+ return i;
+ }
+ throw GroupNotFoundException("DataSet::getGroupPosition: group_id not found.", group_id);
+}
+
+/******************************************************************************/
+
+const Group& DataSet::getGroupAtPosition(size_t group_position) const throw (IndexOutOfBoundsException)
+{
+ if (group_position >= groups_.size())
+ throw IndexOutOfBoundsException("DataSet::getGroup.", group_position, 0, groups_.size());
+ return *(groups_[group_position]);
+}
+
+/******************************************************************************/
+
+void DataSet::deleteGroupAtPosition(size_t group_position) throw (IndexOutOfBoundsException)
+{
+ if (group_position >= groups_.size())
+ throw IndexOutOfBoundsException("DataSet::deleteGroup.", group_position, 0, groups_.size());
+ delete groups_[group_position];
+ groups_.erase(groups_.begin() + group_position);
+}
+
+/******************************************************************************/
+
+size_t DataSet::getNumberOfGroups() const
+{
+ return groups_.size();
+}
+
+/******************************************************************************/
+
+void DataSet::mergeTwoGroups(size_t source_id, size_t target_id) throw (GroupNotFoundException)
+{
+ // Test the existance of the two groups.
+ try
+ {
+ getGroupById(source_id);
+ }
+ catch (GroupNotFoundException& e)
+ {
+ throw GroupNotFoundException("DataSet::mergeTwoGroups: source_id not found.", source_id);
+ }
+ try
+ {
+ getGroupById(target_id);
+ }
+ catch (GroupNotFoundException& e)
+ {
+ throw GroupNotFoundException("DataSet::mergeTwoGroups: target_id not found.", target_id);
+ }
+ // Emptie the source into the target
+ size_t source_pos = getGroupPosition(source_id);
+ size_t target_pos = getGroupPosition(target_id);
+ for (size_t i = 0; i < groups_[source_pos]->getNumberOfIndividuals(); i++)
+ {
+ groups_[target_pos]->addIndividual(groups_[source_pos]->getIndividualAtPosition(i));
+ }
+ deleteGroupAtPosition(source_pos);
+}
+
+/******************************************************************************/
+
+void DataSet::mergeGroups(std::vector<size_t>& group_ids) throw (GroupNotFoundException)
+{
+ // Test if all group id exists in the DataSet
+ for (size_t i = 0; i < group_ids.size(); i++)
+ {
+ try
+ {
+ getGroupById(group_ids[i]);
+ }
+ catch (GroupNotFoundException& e)
+ {
+ throw GroupNotFoundException("DataSet::mergeGroups: group not found.", group_ids[i]);
+ }
+ }
+ // Sort the group id
+ sort(group_ids.begin(), group_ids.end());
+ // Merge all the groups in the first
+ size_t pos_first = getGroupPosition(group_ids[0]);
+ for (size_t i = 1; i < group_ids.size(); i++)
+ {
+ size_t pos_current = getGroupPosition(group_ids[i]);
+ for (size_t j = 0; j < getGroupAtPosition(pos_current).getNumberOfIndividuals(); j++)
+ {
+ groups_[pos_first]->addIndividual(getGroupAtPosition(pos_current).getIndividualAtPosition(j));
+ }
+ deleteGroupAtPosition(pos_current);
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::splitGroup(size_t group_id, std::vector<size_t> individuals_selection) throw (Exception)
+{
+ size_t source_pos;
+ try
+ {
+ source_pos = getGroupPosition(group_id);
+ }
+ catch (GroupNotFoundException& gnfe)
+ {
+ throw GroupNotFoundException("DataSet::splitGroup: group_id not found.", gnfe.getIdentifier());
+ }
+ size_t new_group_id = 0;
+ for (size_t i = 0; i < groups_.size(); i++)
+ {
+ if (groups_[i]->getGroupId() > new_group_id)
+ new_group_id = groups_[i]->getGroupId();
+ }
+ new_group_id++;
+ Group new_group(new_group_id);
+ for (size_t i = 0; i < individuals_selection.size(); i++)
+ {
+ if (individuals_selection[i] >= groups_[source_pos]->getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("DataSet::splitGroup: individuals_selection excedes the number of individual in the group.", individuals_selection[i], 0, groups_[source_pos]->getNumberOfIndividuals());
+ }
+ for (size_t i = 0; i < individuals_selection.size(); i++)
+ {
+ new_group.addIndividual(*groups_[source_pos]->removeIndividualAtPosition(individuals_selection[i]));
+ groups_[source_pos]->deleteIndividualAtPosition(individuals_selection[i]);
+ }
+ addGroup(new_group);
+}
+
+/******************************************************************************/
+
+// Dealing with individuals -------------------------------
+
+void DataSet::addIndividualToGroup(size_t group, const Individual& individual) throw (Exception)
+{
+ if (group >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::addIndividualToGroup: group out of bounds.", group, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group]->addIndividual(individual);
+ if (individual.hasSequences())
+ setAlphabet(individual.getSequenceAlphabet());
+ }
+ catch (BadIdentifierException& bie)
+ {
+ throw BadIdentifierException("DataSet::addIndividualToGroup: individual's id already in use in this group.", bie.getIdentifier());
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::addEmptyIndividualToGroup(size_t group, const std::string& individual_id) throw (Exception)
+{
+ if (group >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::addEmptyIndividual: group out of bounds.", group, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group]->addEmptyIndividual(individual_id);
+ }
+ catch (BadIdentifierException& bie)
+ {
+ throw BadIdentifierException("DataSet::addEmptyIndividual: individual_id already in use.", bie.getIdentifier());
+ }
+}
+
+/******************************************************************************/
+
+size_t DataSet::getNumberOfIndividualsInGroup(size_t group_position) const throw (IndexOutOfBoundsException)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getNumberOfIndividualsInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ return groups_[group_position]->getNumberOfIndividuals();
+}
+
+/******************************************************************************/
+
+size_t DataSet::getIndividualPositionInGroup(size_t group_position, const std::string& individual_id) const throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualPositionFromGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return groups_[group_position]->getIndividualPosition(individual_id);
+ }
+ catch (IndividualNotFoundException infe)
+ {
+ throw IndividualNotFoundException("DataSet::getIndividualPositionFromGroup: individual_id not found.", infe.getIdentifier());
+ }
+}
+
+/******************************************************************************/
+
+const Individual* DataSet::getIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) const throw (IndexOutOfBoundsException)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualAtPositionFromGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return &groups_[group_position]->getIndividualAtPosition(individual_position);
+ }
+ catch (IndexOutOfBoundsException ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::getIndividualAtPositionFromGroup: individual_position out of bouds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+/******************************************************************************/
+
+const Individual* DataSet::getIndividualByIdFromGroup(size_t group_position, const std::string& individual_id) const throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualByIdFromGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return &groups_[group_position]->getIndividualById(individual_id);
+ }
+ catch (IndividualNotFoundException infe)
+ {
+ throw IndividualNotFoundException("DataSet::getIndividualByIdFromGroup: individual_id not found.", infe.getIdentifier());
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::deleteIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) throw (IndexOutOfBoundsException)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::deleteIndividualAtPositionFromGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->deleteIndividualAtPosition(individual_position);
+ }
+ catch (IndexOutOfBoundsException ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::deleteIndividualAtPositionFromGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::deleteIndividualByIdFromGroup(size_t group_position, const std::string& individual_id) throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::deleteIndividualByIdFromGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->deleteIndividualById(individual_id);
+ }
+ catch (IndividualNotFoundException infe)
+ {
+ throw IndividualNotFoundException("DataSet::deleteIndividualByIdFromGroup: individual_id not found.", infe.getIdentifier());
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::setIndividualSexInGroup(size_t group_position, size_t individual_position, const unsigned short sex) throw (IndexOutOfBoundsException)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::setIndividualSexInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->setIndividualSexAtPosition(individual_position, sex);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::setIndividualSexInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+/******************************************************************************/
+
+unsigned short DataSet::getIndividualSexInGroup(size_t group_position, size_t individual_position) const throw (IndexOutOfBoundsException)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualSexInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return groups_[group_position]->getIndividualSexAtPosition(individual_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::getIndividualSexInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::setIndividualDateInGroup(size_t group_position, size_t individual_position, const Date& date) throw (IndexOutOfBoundsException)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::setIndividualDateInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->setIndividualDateAtPosition(individual_position, date);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::setIndividualDateInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+/******************************************************************************/
+
+const Date* DataSet::getIndividualDateInGroup(size_t group_position, size_t individual_position) const throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualDateInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return &groups_[group_position]->getIndividualDateAtPosition(individual_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::getIndividualDateInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::getIndividualDateInGroup: individual has no date.");
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::setIndividualCoordInGroup(size_t group_position, size_t individual_position, const Point2D<double>& coord) throw (IndexOutOfBoundsException)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::setIndividualCoordInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->setIndividualCoordAtPosition(individual_position, coord);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::setIndividualCoordInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+/******************************************************************************/
+
+const Point2D<double>* DataSet::getIndividualCoordInGroup(size_t group_position, size_t individual_position) const throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualCoordInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return &groups_[group_position]->getIndividualCoordAtPosition(individual_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::getIndividualCoordAtPosition: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::getIndividualCoordInGroup: individual has no coordinate.");
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::setIndividualLocalityInGroupByName(size_t group_position, size_t individual_position, const std::string& locality_name) throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::setIndividualLocalityInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->setIndividualLocalityAtPosition(individual_position, &getLocalityByName(locality_name));
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::setIndividualLocalityInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (LocalityNotFoundException& lnfe)
+ {
+ throw LocalityNotFoundException("DataSet::setIndividualLocalityInGroup: locality_name not found.", lnfe.getIdentifier());
+ }
+}
+
+/******************************************************************************/
+
+const Locality<double>* DataSet::getIndividualLocalityInGroup(size_t group_position, size_t individual_position) const throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualLocalityInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return &groups_[group_position]->getIndividualLocalityAtPosition(individual_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::getIndividualLocalityInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::getIndividualLocalityInGroup: individual has no locality.");
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::addIndividualSequenceInGroup(size_t group_position, size_t individual_position, size_t sequence_position, const Sequence& sequence) throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::addIndividualSequenceInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->addIndividualSequenceAtPosition(individual_position, sequence_position, sequence);
+ setAlphabet(sequence.getAlphabet());
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::addIndividualSequenceInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (AlphabetMismatchException& ame)
+ {
+ throw AlphabetMismatchException("DataSet::addIndividualSequenceInGroup: sequence's alphabet doesn't match.", ame.getAlphabets()[0], ame.getAlphabets()[1]);
+ }
+ catch (BadIdentifierException& bie)
+ {
+ throw BadIdentifierException("DataSet::addIndividualSequenceInGroup: sequence's name already in use.", bie.getIdentifier());
+ }
+ catch (BadIntegerException& bie)
+ {
+ throw BadIntegerException("DataSet::addIndividualSequenceInGroup: sequence_position already in use.", bie.getBadInteger());
+ }
+}
+
+/******************************************************************************/
+
+const Sequence& DataSet::getIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualSequenceByNameInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return groups_[group_position]->getIndividualSequenceByName(individual_position, sequence_name);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::getIndividualSequenceByNameInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::getIndividualSequenceByNameInGroup: individual has no sequences.");
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("DataSet::getIndividualSequenceByNameInGroup: sequence_name not found.", snfe.getSequenceId());
+ }
+}
+
+/******************************************************************************/
+
+const Sequence& DataSet::getIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position) const throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualSequenceAtPositionInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return groups_[group_position]->getIndividualSequenceAtPosition(individual_position, sequence_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size())
+ throw IndexOutOfBoundsException("DataSet::getIndividualSequenceAtPositionInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ // if (string(ioobe.what()).find("sequence_position") < string(ioobe.what()).size())
+ else
+ throw IndexOutOfBoundsException("DataSet::getIndividualSequenceAtPositionInGroup: sequence_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::getIndividualSequenceAtPositionInGroup: individual has no sequences.");
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::deleteIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::deleteIndividualSequenceByNameInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->deleteIndividualSequenceByName(individual_position, sequence_name);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::deleteIndividualSequenceByNameInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::deleteIndividualSequenceByNameInGroup: individual has no sequences.");
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("DataSet::deleteIndividualSequenceByNameInGroup: sequence_name not found.", snfe.getSequenceId());
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::deleteIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position) throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::deleteIndividualSequenceAtPositionInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->deleteIndividualSequenceAtPosition(individual_position, sequence_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size())
+ throw IndexOutOfBoundsException("DataSet::deleteIndividualSequenceAtPositionInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ // if (string(ioobe.what()).find("sequence_position") < string(ioobe.what()).size())
+ else
+ throw IndexOutOfBoundsException("DataSet::deleteIndividualSequenceAtPositionInGroup: sequence_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::deleteIndividualSequenceAtPositionInGroup: individual has no sequences.");
+ }
+}
+
+/******************************************************************************/
+
+std::vector<std::string> DataSet::getIndividualSequencesNamesInGroup(size_t group_position, size_t individual_position) const throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualSequencesNamesInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return groups_[group_position]->getIndividualSequencesNames(individual_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::getIndividualSequencesNamesInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::getIndividualSequencesNamesInGroup: individual has no sequences.");
+ }
+}
+
+/******************************************************************************/
+
+size_t DataSet::getIndividualSequencePositionInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualSequencePositionInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return groups_[group_position]->getIndividualSequencePosition(individual_position, sequence_name);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::getIndividualSequencePositionInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::getIndividualSequencePositionInGroup: individual has no sequences.");
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("DataSet::getIndividualSequencePositionInGroup: sequence_name not found.", snfe.getSequenceId());
+ }
+}
+
+/******************************************************************************/
+
+size_t DataSet::getIndividualNumberOfSequencesInGroup(size_t group_position, size_t individual_position) const throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualNumberOfSequencesInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return groups_[group_position]->getIndividualNumberOfSequences(individual_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::getIndividualNumberOfSequencesInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::getIndividualNumberOfSequencesInGroup: individual has no sequences.");
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::setIndividualGenotypeInGroup(size_t group_position, size_t individual_position, const MultilocusGenotype& genotype) throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::setIndividualGenotypeInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->setIndividualGenotype(individual_position, genotype);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::setIndividualGenotypeInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::initIndividualGenotypeInGroup(size_t group_position, size_t individual_position) throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::initIndividualGenotypeInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->initIndividualGenotype(individual_position, getNumberOfLoci());
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::initIndividualGenotypeInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (BadIntegerException& bie)
+ {
+ throw BadIntegerException("DataSet::initIndividualGenotypeInGroup: number of loci must be > 0.", bie.getBadInteger());
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::initIndividualGenotypeInGroup: analyzed_loci is NULL.");
+ }
+ catch (Exception)
+ {
+ throw Exception("DataSet::initIndividualGenotypeInGroup: individual already has a genotype.");
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::deleteIndividualGenotypeInGroup(size_t group_position, size_t individual_position) throw (IndexOutOfBoundsException)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::deleteIndividualGenotypeInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->deleteIndividualGenotype(individual_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::deleteIndividualGenotypeInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::setIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position, const MonolocusGenotype& monogen) throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->setIndividualMonolocusGenotype(individual_position, locus_position, monogen);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size())
+ throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ // if (string(ioobe.what()).find("locus_position") < string(ioobe.what()).size())
+ else
+ throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeInGroup: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::setIndividualMonolocusGenotypeInGroup: individual has no genotype.");
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector<size_t> allele_keys) throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ groups_[group_position]->setIndividualMonolocusGenotypeByAlleleKey(individual_position, locus_position, allele_keys);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size())
+ throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ // if (string(ioobe.what()).find("locus_position") < string(ioobe.what()).size())
+ else
+ throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup: individual has no genotype.");
+ }
+ catch (Exception)
+ {
+ throw Exception("DataSet::setIndividualMonolocusGenotypeByAlleleKeyInGroup: no key in allele_keys.");
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector<std::string> allele_id) throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ const LocusInfo& locus_info = getLocusInfoAtPosition(locus_position);
+ try
+ {
+ groups_[group_position]->setIndividualMonolocusGenotypeByAlleleId(individual_position, locus_position, allele_id, locus_info);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size())
+ throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ // if (string(ioobe.what()).find("locus_position") < string(ioobe.what()).size())
+ else
+ throw IndexOutOfBoundsException("DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup: individual has no genotype.");
+ }
+ catch (AlleleNotFoundException& anfe)
+ {
+ throw AlleleNotFoundException("DataSet::setIndividualMonolocusGenotypeByAlleleIdInGroup: id not found.", anfe.getIdentifier());
+ }
+}
+
+/******************************************************************************/
+
+const MonolocusGenotype* DataSet::getIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position) const throw (Exception)
+{
+ if (group_position >= getNumberOfGroups())
+ throw IndexOutOfBoundsException("DataSet::getIndividualMonolocusGenotypeInGroup: group_position out of bounds.", group_position, 0, getNumberOfGroups());
+ try
+ {
+ return &groups_[group_position]->getIndividualMonolocusGenotype(individual_position, locus_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ if (string(ioobe.what()).find("individual_position") < string(ioobe.what()).size())
+ throw IndexOutOfBoundsException("DataSet::getIndividualMonolocusGenotypeInGroup: individual_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ // if (string(ioobe.what()).find("locus_position") < string(ioobe.what()).size())
+ else
+ throw IndexOutOfBoundsException("DataSet::getIndividualMonolocusGenotypeInGroup: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException)
+ {
+ throw NullPointerException("DataSet::getIndividualMonolocusGenotypeInGroup: individual has no genotype.");
+ }
+}
+
+/******************************************************************************/
+
+// Dealing with AnalyzedSequences --------------------------
+
+void DataSet::setAlphabet(const Alphabet* alpha)
+{
+ if (analyzedSequences_ == 0)
+ analyzedSequences_ = new AnalyzedSequences();
+ analyzedSequences_->setAlphabet(alpha);
+}
+
+/******************************************************************************/
+
+void DataSet::setAlphabet(const std::string& alpha_type)
+{
+ if (analyzedSequences_ == 0)
+ analyzedSequences_ = new AnalyzedSequences();
+ analyzedSequences_->setAlphabet(alpha_type);
+}
+
+/******************************************************************************/
+
+const Alphabet* DataSet::getAlphabet() const throw (NullPointerException)
+{
+ if (analyzedSequences_ != 0)
+ return analyzedSequences_->getAlphabet();
+ throw NullPointerException("DataSet::getAlphabet: no sequence data.");
+}
+
+/******************************************************************************/
+
+std::string DataSet::getAlphabetType() const throw (NullPointerException)
+{
+ if (analyzedSequences_ != 0)
+ return analyzedSequences_->getAlphabetType();
+ throw NullPointerException("DataSet::getAlphabetType: no sequence data.");
+}
+
+/******************************************************************************/
+
+// Dealing with AnalyzedLoci -------------------------------
+
+void DataSet::setAnalyzedLoci(const AnalyzedLoci& analyzedLoci) throw (Exception)
+{
+ if (analyzedLoci_ != 0)
+ {
+ try
+ {
+ deleteAnalyzedLoci();
+ }
+ catch (Exception& e)
+ {
+ throw Exception ("DataSet::setAnalyzedLoci: at least one individual has a genotype of the actual AnalyzedLoci.");
+ }
+ }
+ analyzedLoci_ = new AnalyzedLoci(analyzedLoci);
+}
+
+/******************************************************************************/
+
+void DataSet::initAnalyzedLoci(size_t number_of_loci) throw (Exception)
+{
+ if (analyzedLoci_ != 0)
+ throw Exception("DataSet::initAnalyzedLoci: analyzedLoci_ already initialyzed.");
+ analyzedLoci_ = new AnalyzedLoci(number_of_loci);
+}
+
+/******************************************************************************/
+
+const AnalyzedLoci* DataSet::getAnalyzedLoci() const throw (NullPointerException)
+{
+ if (analyzedLoci_ != 0)
+ return analyzedLoci_;
+ throw NullPointerException("DataSet::getAnalyzedLoci: no loci initialized.");
+}
+
+/******************************************************************************/
+
+void DataSet::deleteAnalyzedLoci()
+{
+ if (analyzedLoci_ != 0)
+ delete analyzedLoci_;
+}
+
+/******************************************************************************/
+
+void DataSet::setLocusInfo(size_t locus_position, const LocusInfo& locus) throw (Exception)
+{
+ if (analyzedLoci_ == 0)
+ throw NullPointerException("DataSet::setLocusInfo: there's no AnalyzedLoci to setup.");
+ try
+ {
+ analyzedLoci_->setLocusInfo(locus_position, locus);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::setLocusInfo: locus_position out of bounds.", locus_position, 0, analyzedLoci_->getNumberOfLoci());
+ }
+}
+
+/******************************************************************************/
+
+const LocusInfo& DataSet::getLocusInfoByName(const std::string& locus_name) const throw (Exception)
+{
+ if (analyzedLoci_ == 0)
+ throw NullPointerException("DataSet::getLocusInfoByName: there's no AnalyzedLoci.");
+ try
+ {
+ return analyzedLoci_->getLocusInfoByName(locus_name);
+ }
+ catch (LocusNotFoundException& lnfe)
+ {
+ throw LocusNotFoundException("DataSet::getLocusInfoByName: locus_name not found", locus_name);
+ }
+}
+
+/******************************************************************************/
+
+const LocusInfo& DataSet::getLocusInfoAtPosition(size_t locus_position) const throw (Exception)
+{
+ if (analyzedLoci_ == 0)
+ throw NullPointerException("DataSet::getLocusInfoAtPosition: there's no AnalyzedLoci.");
+ try
+ {
+ return analyzedLoci_->getLocusInfoAtPosition(locus_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::getLocusInfoAtPosition: locus_position out of bounds.", locus_position, ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("DataSet::getLocusInfoAtPosition: no locus defined here");
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::addAlleleInfoByLocusName(const std::string& locus_name, const AlleleInfo& allele) throw (Exception)
+{
+ if (analyzedLoci_ == 0)
+ throw NullPointerException("DataSet::addAlleleInfoByLocusName: there's no AnalyzedLoci.");
+ try
+ {
+ analyzedLoci_->addAlleleInfoByLocusName(locus_name, allele);
+ }
+ catch (LocusNotFoundException& lnfe)
+ {
+ throw LocusNotFoundException("DataSet::addAlleleInfoByLocusName: locus_name not found.", lnfe.getIdentifier());
+ }
+ catch (BadIdentifierException& bie)
+ {
+ throw BadIdentifierException("DataSet::addAlleleInfoByLocusName: allele's id already in use.", bie.getIdentifier());
+ }
+}
+
+/******************************************************************************/
+
+void DataSet::addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo& allele) throw (Exception)
+{
+ if (analyzedLoci_ == 0)
+ throw NullPointerException("DataSet::addAlleleInfoByLocusPosition: there's no AnalyzedLoci.");
+ try
+ {
+ analyzedLoci_->addAlleleInfoByLocusPosition(locus_position, allele);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::addAlleleInfoByLocusPosition: locus_position out of bounds.", locus_position, ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (BadIdentifierException& bie)
+ {
+ throw BadIdentifierException("DataSet::addAlleleInfoByLocusPosition: allele'e id already in use.", bie.getIdentifier());
+ }
+}
+
+/******************************************************************************/
+
+size_t DataSet::getNumberOfLoci() const throw (NullPointerException)
+{
+ if (analyzedLoci_ == 0)
+ throw NullPointerException("DataSet::getNumberOfLoci: there's no AnalyzedLoci.");
+ return analyzedLoci_->getNumberOfLoci();
+}
+
+/******************************************************************************/
+
+size_t DataSet::getPloidyByLocusName(const std::string& locus_name) const throw (Exception)
+{
+ if (analyzedLoci_ == 0)
+ throw NullPointerException("DataSet::getPloidyByLocusName: there's no AnalyzedLoci.");
+ try
+ {
+ return analyzedLoci_->getPloidyByLocusName(locus_name);
+ }
+ catch (LocusNotFoundException& lnfe)
+ {
+ throw LocusNotFoundException("DataSet::getPloidyByLocusName: locus_name not found.", lnfe.getIdentifier());
+ }
+}
+
+/******************************************************************************/
+
+size_t DataSet::getPloidyByLocusPosition(size_t locus_position) const throw (Exception)
+{
+ if (analyzedLoci_ == 0)
+ throw NullPointerException("DataSet::getPloidyByLocusPosition: there's no AnalyzedLoci.");
+ try
+ {
+ return analyzedLoci_->getPloidyByLocusPosition(locus_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("DataSet::getPloidyByLocusPosition: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+/******************************************************************************/
+
+// Container extraction -----------------------------------
+
+PolymorphismMultiGContainer* DataSet::getPolymorphismMultiGContainer() const
+{
+ PolymorphismMultiGContainer* pmgc = new PolymorphismMultiGContainer();
+ for (size_t i = 0; i < getNumberOfGroups(); i++)
+ {
+ // nommer les groupes khalid
+ string name = groups_[i]->getGroupName();
+ pmgc->addGroupName(i, name);
+ for (size_t j = 0; j < getNumberOfIndividualsInGroup(i); j++)
+ {
+ const Individual* tmp_ind = getIndividualAtPositionFromGroup(i, j);
+ if (tmp_ind->hasGenotype())
+ {
+ const MultilocusGenotype& tmp_mg = tmp_ind->getGenotype();
+ pmgc->addMultilocusGenotype(tmp_mg, i);
+ }
+ }
+ }
+ return pmgc;
+}
+
+/******************************************************************************/
+
+PolymorphismMultiGContainer* DataSet::getPolymorphismMultiGContainer(const std::map<size_t, std::vector<size_t> >& selection) const throw (Exception)
+{
+ PolymorphismMultiGContainer* pmgc = new PolymorphismMultiGContainer();
+ for (map<size_t, vector<size_t> >::const_iterator it = selection.begin(); it != selection.end(); it++)
+ {
+ size_t i;
+ try
+ {
+ i = getGroupPosition(it->first);
+ }
+ catch (GroupNotFoundException& gnfe)
+ {
+ throw gnfe;
+ }
+ string name = groups_[i]->getGroupName();
+ pmgc->addGroupName(i, name);
+ for (size_t j = 0; j < it->second.size(); j++)
+ {
+ const Individual* tmp_ind = 0;
+ try
+ {
+ tmp_ind = getIndividualAtPositionFromGroup(i, j);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw ioobe;
+ }
+ if (tmp_ind->hasGenotype())
+ {
+ const MultilocusGenotype& tmp_mg = tmp_ind->getGenotype();
+ pmgc->addMultilocusGenotype(tmp_mg, i);
+ }
+ }
+ }
+ return pmgc;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* DataSet::getPolymorphismSequenceContainer(const std::map<size_t, std::vector<size_t> >& selection, size_t sequence_position) const throw (Exception)
+{
+ PolymorphismSequenceContainer* psc = new PolymorphismSequenceContainer(getAlphabet());
+ for (map<size_t, vector<size_t> >::const_iterator it = selection.begin(); it != selection.end(); it++)
+ {
+ size_t i;
+ try
+ {
+ i = getGroupPosition(it->first);
+ }
+ catch (GroupNotFoundException& gnfe)
+ {
+ delete psc;
+ throw gnfe;
+ }
+ for (size_t j = 0; j < it->second.size(); j++)
+ {
+ const Individual* tmp_ind = 0;
+ try
+ {
+ tmp_ind = getIndividualAtPositionFromGroup(i, j);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ delete psc;
+ throw ioobe;
+ }
+ if (tmp_ind->hasSequenceAtPosition(sequence_position))
+ {
+ const Sequence* tmp_seq = &tmp_ind->getSequenceAtPosition(sequence_position);
+ psc->addSequence(*tmp_seq, 1, false);
+ psc->setGroupId((const string) (tmp_seq->getName()), it->first);
+ }
+ }
+ }
+ return psc;
+}
+
+/******************************************************************************/
+
+// General tests ------------------------------------------
+
+bool DataSet::hasSequenceData() const
+{
+ return analyzedSequences_ != 0;
+}
+
+/******************************************************************************/
+
+bool DataSet::hasAlleleicData() const
+{
+ return analyzedLoci_ != 0;
+}
+
+/******************************************************************************/
+
diff --git a/src/Bpp/PopGen/DataSet.h b/src/Bpp/PopGen/DataSet.h
new file mode 100644
index 0000000..c54e4ca
--- /dev/null
+++ b/src/Bpp/PopGen/DataSet.h
@@ -0,0 +1,695 @@
+//
+// File DataSet.h
+// Author : Sylvain Gaillard
+// Last modification : April 4, 2008
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _DATASET_H_
+#define _DATASET_H_
+
+// From the STL
+#include <algorithm>
+#include <vector>
+#include <map>
+#include <string>
+
+#include <Bpp/Exceptions.h>
+#include <Bpp/Graphics/Point2D.h>
+#include <Bpp/Utils/MapTools.h>
+
+// From PopGenLib (local)
+#include "Group.h"
+#include "Individual.h"
+#include "Locality.h"
+#include "GeneralExceptions.h"
+#include "AnalyzedLoci.h"
+#include "AnalyzedSequences.h"
+#include "PolymorphismMultiGContainer.h"
+#include "PolymorphismSequenceContainer.h"
+
+namespace bpp
+{
+/**
+ * @brief The DataSet class.
+ *
+ * A DataSet the object that manage every data on which one can compute
+ * some statistics.
+ *
+ * @author Sylvain Gaillard
+ */
+class DataSet
+{
+private:
+ AnalyzedLoci* analyzedLoci_;
+ AnalyzedSequences* analyzedSequences_;
+ std::vector<Locality<double>*> localities_;
+ std::vector<Group*> groups_;
+
+public:
+ // Constructor and destructor
+ /**
+ * @brief Build a new void DataSet.
+ */
+ DataSet();
+
+ /**
+ * @brief Destroy a DataSet.
+ */
+ ~DataSet();
+
+ /**
+ * @brief Copy constructor.
+ */
+ DataSet(const DataSet& ds);
+
+ DataSet& operator=(const DataSet& ds);
+
+public:
+ // Methodes
+// ** Locality manipulation ***************************************************/
+ /**
+ * @brief Add a locality to the DataSet.
+ *
+ * @param locality A Locality object.
+ * @throw BadIdentifierException if the locality's name already exists.
+ */
+ void addLocality(Locality<double>& locality) throw (BadIdentifierException);
+
+ /**
+ * @brief Get the position of a locality in the container.
+ *
+ * @return The locality_position (position) of the Locality.
+ * @param name The locality's name to find.
+ * @throw LocalityNotFoundException if the locality's name doesn't match any name in the DataSet.
+ */
+ size_t getLocalityPosition(const std::string& name) const throw (LocalityNotFoundException);
+
+ /**
+ * @brief Get a Locality by locality_position.
+ *
+ * @return A const pointer to the locality matching the locality_position.
+ * @param locality_position The position of the Locality in the DataSet.
+ * @throw IndexOutOfBoundsException if locality_position excedes the number of locality of the DataSet.
+ */
+ const Locality<double>& getLocalityAtPosition(size_t locality_position) const throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get a Locality by name.
+ *
+ * @throw LocalityNotFoundException if the locality's name is not found.
+ */
+ const Locality<double>& getLocalityByName(const std::string& name) const throw (LocalityNotFoundException);
+
+ /**
+ * @brief Delete a Locality from the DataSet.
+ *
+ * @throw IndexOutOfBoundsException if locality_position excedes the number of Locality.
+ */
+ void deleteLocalityAtPosition(size_t locality_position) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Delete a Locality from the DataSet.
+ *
+ * @throw LocalityNotFoundException if the locality's name is not found.
+ */
+ void deleteLocalityByName(const std::string& name) throw (LocalityNotFoundException);
+
+ /**
+ * @brief Get the number of Localities.
+ */
+ size_t getNumberOfLocalities() const;
+
+ /**
+ * @brief Tell if there is at least one locality.
+ */
+ bool hasLocality() const;
+
+ // ** Group manipulation ******************************************************/
+ /**
+ * @brief Add a Group to the DataSet.
+ *
+ * Add a Group to the DataSet.
+ *
+ * @param group A pointer to the Group to add.
+ */
+ void addGroup(const Group& group) throw (BadIdentifierException);
+
+ /**
+ * @brief Add an empty Group to the DataSet.
+ */
+ void addEmptyGroup(size_t group_id) throw (BadIdentifierException);
+
+ /**
+ * @brief Get a group by identifier.
+ */
+ const Group& getGroupById(size_t group_id) const throw (GroupNotFoundException);
+
+ /**
+ * @brief Get the position of a Group.
+ *
+ * @throw GroupNotFoundException if the group_id is not found.
+ */
+ size_t getGroupPosition(size_t group_id) const throw (GroupNotFoundException);
+
+ /**
+ * @brief Get the name of a Group. If the name is an empty string it just returns the group_id
+ *
+ * @throw GroupNotFoundException if the group_id is not found.
+ */
+ std::string getGroupName(size_t group_id) const throw (GroupNotFoundException);
+ /**
+ * @brief set the name of a Group.
+ *
+ * @throw GroupNotFoundException if the group_id is not found.
+ */
+ void setGroupName(size_t group_id, const std::string& group_name) const throw (GroupNotFoundException);
+
+ /**
+ * @brief Get a group by position.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ */
+ const Group& getGroupAtPosition(size_t group_position) const throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Delete a Group from the DataSet.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ */
+ void deleteGroupAtPosition(size_t group_position) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the number of Groups.
+ */
+ size_t getNumberOfGroups() const;
+
+ /**
+ * @brief Merge two groups.
+ *
+ * This methode merge two groups. The source group is emptied into the target
+ * and then is deleted.
+ */
+ void mergeTwoGroups(size_t source_id, size_t target_id) throw (GroupNotFoundException);
+
+ /**
+ * @brief Merge some Groups in one.
+ *
+ * Merge all the groups which are specified in the first one (smallest
+ * identifier). When a group is merged to the first, it is deleted from the
+ * DataSet.
+ *
+ * @param group_ids A vector size_t listing the id of groups to merge.
+ * @throw IndexOutOfBoundsException if one of the int in groups excedes the number of groups.
+ */
+ void mergeGroups(std::vector<size_t>& group_ids) throw (GroupNotFoundException);
+
+ /**
+ * @brief Split a group in two.
+ *
+ * @param group_id The identifier of the source group.
+ * @param individuals_selection The positions of the Individuals to extract from the group to make the new group.
+ * @throw GroupNotFoundException if the group_id is not found.
+ * @throw IndexOutOfBoundsException if one position of the selection excedes the number of individuals of the group.
+ */
+ void splitGroup(size_t group_id, std::vector<size_t> individuals_selection) throw (Exception);
+
+ // ** Individuals manipulation ************************************************/
+ /**
+ * @brief Add an Individual to a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw BadIdentifierException if the individual's id is already in use.
+ */
+ void addIndividualToGroup(size_t group_position, const Individual& individual) throw (Exception);
+
+ /**
+ * @brief Add an empty Individual to a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw BadIdentifierException if the individual's id is already in use.
+ */
+ void addEmptyIndividualToGroup(size_t group_position, const std::string& individual_id) throw (Exception);
+
+ /**
+ * @brief Get the number of Individuals in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ */
+ size_t getNumberOfIndividualsInGroup(size_t group_position) const
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the position of an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndividualNotFoundException if individual_id is not found.
+ */
+ size_t getIndividualPositionInGroup(size_t group_position, const std::string& individual_id) const
+ throw (Exception);
+ /**
+ * @brief Get an Individual from a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ */
+ const Individual* getIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) const
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get an Individual from a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndividualNotFoundException if individual_id is not found.
+ */
+ const Individual* getIndividualByIdFromGroup(size_t group_position, const std::string& individual_id) const
+ throw (Exception);
+
+ /**
+ * @brief Delete an Individual from a group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ */
+ void deleteIndividualAtPositionFromGroup(size_t group_position, size_t individual_position)
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Delete an Individual from a group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndividualNotFoundException if individual_id is not found.
+ */
+ void deleteIndividualByIdFromGroup(size_t group_position, const std::string& individual_id)
+ throw (Exception);
+
+ /**
+ * @brief Set the sex of an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ */
+ void setIndividualSexInGroup(size_t group_position, size_t individual_position, const unsigned short sex)
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the sex of an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ */
+ unsigned short getIndividualSexInGroup(size_t group_position, size_t individual_position) const
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Set the Date of an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ */
+ void setIndividualDateInGroup(size_t group_position, size_t individual_position, const Date& date)
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the Date of an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no date.
+ */
+ const Date* getIndividualDateInGroup(size_t group_position, size_t individual_position) const
+ throw (Exception);
+
+ /**
+ * @brief Set the coordinates of an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ */
+ void setIndividualCoordInGroup(size_t group_position, size_t individual_position, const Point2D<double>& coord)
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the coordinate of an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no coordinate.
+ */
+ const Point2D<double>* getIndividualCoordInGroup(size_t group_position, size_t individual_position) const
+ throw (Exception);
+
+ /**
+ * @brief Set the Locality of an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw LocalityNotFoundException if locality_name is not found.
+ */
+ void setIndividualLocalityInGroupByName(size_t group_position, size_t individual_position, const std::string& locality_name)
+ throw (Exception);
+
+ /**
+ * @brief Get the Locality of an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no locality.
+ */
+ const Locality<double>* getIndividualLocalityInGroup(size_t group_position, size_t individual_position) const
+ throw (Exception);
+
+ /**
+ * @brief Add a Sequence to an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw AlphabetMismatchException if the sequence's alphabet doesn't match the container's alphabet.
+ * @throw BadIdentifierException if the sequence's name is already in use.
+ */
+ void addIndividualSequenceInGroup(size_t group_position, size_t individual_position,
+ size_t sequence_position, const Sequence& sequence)
+ throw (Exception);
+
+ /**
+ * @brief Get a Sequence from an Individual of a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no sequences.
+ * @throw SequenceNotFoundException if sequence_name is not found.
+ * @throw BadIntegerException if sequence_position is already in use.
+ */
+ const Sequence& getIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const
+ throw (Exception);
+
+ /**
+ * @brief Get a Sequence from an Individual of a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no sequences.
+ * @throw SequenceNotFoundException if sequence_position is not found.
+ */
+ const Sequence& getIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position) const
+ throw (Exception);
+
+ /**
+ * @brief Delete a Sequence of an Individual of a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no sequences.
+ * @throw SequenceNotFoundException if sequence_name is not found.
+ */
+ void deleteIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name)
+ throw (Exception);
+
+ /**
+ * @brief Delete a Sequence of an Individual of a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no sequences.
+ * @throw SequenceNotFoundException if sequence_position is not found.
+ */
+ void deleteIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position)
+ throw (Exception);
+
+ /**
+ * @brief Get the Sequences' names from an Individual of a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no sequences.
+ */
+ std::vector<std::string> getIndividualSequencesNamesInGroup(size_t group_position, size_t individual_position) const
+ throw (Exception);
+
+ /**
+ * @brief Get the position of a Sequence in an Individual of a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no sequences.
+ * @throw SequenceNotFoundException if sequence_name is not found.
+ */
+ size_t getIndividualSequencePositionInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const
+ throw (Exception);
+
+ /**
+ * @brief Get the number of Sequences in an Individual of a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no sequences.
+ */
+ size_t getIndividualNumberOfSequencesInGroup(size_t group_position, size_t individual_position) const
+ throw (Exception);
+
+ /**
+ * @brief Set the MultilocusGenotype of an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ */
+ void setIndividualGenotypeInGroup(size_t group_position, size_t individual_position, const MultilocusGenotype& genotype)
+ throw (Exception);
+
+ /**
+ * @brief Initialyze the genotype of an Individual in a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw BadIntegerException if the number of loci is < 1;
+ * @throw NullPointerException if analyzed_loci is NULL.
+ * @throw Exception if the individual already has a genotype.
+ */
+ void initIndividualGenotypeInGroup(size_t group_position, size_t individual_position)
+ throw (Exception);
+
+ /**
+ * @brief Delete the MultilocusGenotype of an Individual from a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ */
+ void deleteIndividualGenotypeInGroup(size_t group_position, size_t individual_position)
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Set a MonolocusGenotype of an Individual from a group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no genotype.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of locus.
+ */
+ void setIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position, const MonolocusGenotype& monogen)
+ throw (Exception);
+
+ /**
+ * @brief Set a MonolocusGenotype of an Individual from a group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no genotype.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of locus.
+ * @throw Exception if the ploidy doesn't match.
+ */
+ void setIndividualMonolocusGenotypeByAlleleKeyInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector<size_t> allele_keys)
+ throw (Exception);
+
+ /**
+ * @brief Set a MonolocusGenotype of an Individual from a group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no genotype.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of locus.
+ * @throw Exception if there is no key in allele_keys.
+ */
+ void setIndividualMonolocusGenotypeByAlleleIdInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector<std::string> allele_id)
+ throw (Exception);
+
+ /**
+ * @brief Get a MonolocusGenotype from an Individual of a Group.
+ *
+ * @throw IndexOutOfBoundsException if group_position excedes the number of groups.
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individual in the group.
+ * @throw NullPointerException if the individual has no genotype.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of locus.
+ * @throw AlleleNotFoundException if at least one of the id is not found.
+ */
+ const MonolocusGenotype* getIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position) const
+ throw (Exception);
+
+ // ** AnalyzedSequences manipulation ******************************************/
+ /**
+ * @brief Set the alphabet of the AnalyzedSequences.
+ */
+ void setAlphabet(const Alphabet* alpha);
+
+ /**
+ * @brief Set the alphabet of the AnalyzedSequences by its type..
+ */
+ void setAlphabet(const std::string& alpha_type);
+
+ /**
+ * @brief Get the alphabet if there is sequence data.
+ *
+ * @throw NullPointerException if there is no sequence data.
+ */
+ const Alphabet* getAlphabet() const throw (NullPointerException);
+
+ /**
+ * @brief Get the alphabet type as a string.
+ *
+ * @throw NullPointerException if there is no sequence data.
+ */
+ std::string getAlphabetType() const throw (NullPointerException);
+
+ // ** AnalyzedLoci manipulation ***********************************************/
+ /**
+ * @brief Set the AnalyzedLoci to the DataSet.
+ *
+ * @throw Exception if at least one Individual has a genotype refering to the actual AnalyzedLoci.
+ */
+ void setAnalyzedLoci(const AnalyzedLoci& analyzedLoci) throw (Exception);
+
+ /**
+ * @brief Initialize the AnalyzedLoci for number of loci.
+ *
+ * @throw Exception if the AnalyzedLoci has already been initialyzed.
+ */
+ void initAnalyzedLoci(size_t number_of_loci) throw (Exception);
+
+ /**
+ * @brief Get the AnalyzedLoci if there is one.
+ *
+ * @throw NullPointerException if there is no AnalyzedLoci.
+ */
+ const AnalyzedLoci* getAnalyzedLoci() const throw (NullPointerException);
+
+ /**
+ * @brief Delete the AnalyzedLoci.
+ */
+ void deleteAnalyzedLoci();
+
+ /**
+ * @brief Set a LocusInfo.
+ *
+ * @throw NullPointerException if there is no AnalyzedLoci to setup.
+ * @throw IndexOutOfBoundsException if locus_position excedes the total of LocusInfo of the DataSet.
+ */
+ void setLocusInfo(size_t locus_position, const LocusInfo& locus)
+ throw (Exception);
+
+ /**
+ * @brief Get a LocusInfo by its name.
+ */
+ const LocusInfo& getLocusInfoByName(const std::string& locus_name) const
+ throw (Exception);
+
+ /**
+ * @brief Get a LocusInfo by its position.
+ */
+ const LocusInfo& getLocusInfoAtPosition(size_t locus_position) const
+ throw (Exception);
+
+ /**
+ * @brief Add an AlleleInfo to a LocusInfo.
+ */
+ void addAlleleInfoByLocusName(const std::string& locus_name, const AlleleInfo& allele)
+ throw (Exception);
+
+ /**
+ * @brief Add an AlleleInfo to a LocusInfo.
+ */
+ void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo& allele)
+ throw (Exception);
+
+ /**
+ * @brief Get the number of loci.
+ */
+ size_t getNumberOfLoci() const throw (NullPointerException);
+
+ /**
+ * @brief Get the ploidy of a locus.
+ */
+ size_t getPloidyByLocusName(const std::string& locus_name) const throw (Exception);
+
+ /**
+ * @brief Get the ploidy of a locus.
+ */
+ size_t getPloidyByLocusPosition(size_t locus_position) const throw (Exception);
+
+ // ** Container extraction ***************************************************/
+ /**
+ * @brief Get a PolymorphismMultiGContainer with all allelic data of the DataSet.
+ */
+ PolymorphismMultiGContainer* getPolymorphismMultiGContainer() const;
+
+ /**
+ * @brief Get a PolymorphismMultiGContainer from a selection of groups and individuals.
+ *
+ * @param selection A map with groups id as keys and vector of individuals position in each group as values.
+ */
+ PolymorphismMultiGContainer* getPolymorphismMultiGContainer(const std::map<size_t, std::vector<size_t> >& selection) const throw (Exception);
+
+ /**
+ * @brief Get a PolymorphismSequenceContainer from a selection of groups and individuals.
+ *
+ * All the sequences are ingroup. You may change their state after created the container.
+ * @param selection A map with groups id as keys and vector of individuals position in each group as values.
+ * @param sequence_position The position of the sequence in the individuals;
+ */
+ PolymorphismSequenceContainer* getPolymorphismSequenceContainer(const std::map<size_t, std::vector<size_t> >& selection, size_t sequence_position) const throw (Exception);
+
+ // ** General tests **********************************************************/
+ /**
+ * @brief Tell if at least one individual has at least one sequence.
+ */
+ bool hasSequenceData() const;
+
+ /**
+ * @brief Tell if there is alelelic data.
+ */
+ bool hasAlleleicData() const;
+};
+} // end of namespace bpp;
+
+#endif // _DATASET_H_
+
diff --git a/src/Bpp/PopGen/DataSetTools.cpp b/src/Bpp/PopGen/DataSetTools.cpp
new file mode 100644
index 0000000..7e9be72
--- /dev/null
+++ b/src/Bpp/PopGen/DataSetTools.cpp
@@ -0,0 +1,90 @@
+//
+// File DataSetTools.cpp
+// Author : Sylvain Gaillard
+// Last modification : Wednesday August 04 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "DataSetTools.h"
+
+using namespace bpp;
+using namespace std;
+
+std::auto_ptr<DataSet> DataSetTools::buildDataSet(const OrderedSequenceContainer& osc) throw (Exception)
+{
+ auto_ptr<DataSet> d_s(new DataSet());
+ d_s->addEmptyGroup(0);
+ for (size_t i = 0; i < osc.getNumberOfSequences(); i++)
+ {
+ d_s->addEmptyIndividualToGroup(0, string("Individual_") + TextTools::toString(i + 1));
+ try
+ {
+ d_s->addIndividualSequenceInGroup(0, i, 0, osc.getSequence(i));
+ }
+ catch (Exception& e)
+ {
+ throw e;
+ }
+ }
+ return d_s;
+}
+
+std::auto_ptr<DataSet> DataSetTools::buildDataSet(const PolymorphismSequenceContainer& psc) throw (Exception)
+{
+ auto_ptr<DataSet> d_s(new DataSet());
+ set<size_t> grp_ids = psc.getAllGroupsIds();
+ for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
+ {
+ d_s->addEmptyGroup(*it);
+ }
+ size_t ind_count = 0;
+ for (size_t i = 0; i < psc.getNumberOfSequences(); i++)
+ {
+ for (size_t j = 0; j < psc.getSequenceCount(i); j++)
+ {
+ d_s->addEmptyIndividualToGroup(psc.getGroupId(i), string("Individual_") + TextTools::toString(ind_count++));
+ try
+ {
+ d_s->addIndividualSequenceInGroup(psc.getGroupId(i), i, 0, psc.getSequence(i));
+ }
+ catch (Exception& e)
+ {
+ throw e;
+ }
+ }
+ }
+ return d_s;
+}
+
diff --git a/src/Bpp/PopGen/DataSetTools.h b/src/Bpp/PopGen/DataSetTools.h
new file mode 100644
index 0000000..5b1d461
--- /dev/null
+++ b/src/Bpp/PopGen/DataSetTools.h
@@ -0,0 +1,80 @@
+//
+// File DataSetTools.h
+// Author : Sylvain Gaillard
+// Last modification : Wednesday August 04 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _DATASETTOOLS_H_
+#define _DATASETTOOLS_H_
+
+// From STL
+#include <set>
+#include <memory>
+
+#include <Bpp/Exceptions.h>
+#include <Bpp/Text/TextTools.h>
+
+// From SeqLib
+#include <Bpp/Seq/Container/OrderedSequenceContainer.h>
+
+// From local PopGenLib
+#include "DataSet.h"
+#include "PolymorphismSequenceContainer.h"
+
+namespace bpp
+{
+/**
+ * @brief A set of tools for DataSet.
+ *
+ * @author Sylvain Gaillard
+ */
+class DataSetTools
+{
+public:
+ /**
+ * @brief General method to build a DataSet from an OrderedSequenceContainer.
+ */
+ static std::auto_ptr<DataSet> buildDataSet(const OrderedSequenceContainer& osc) throw (Exception);
+
+ /**
+ * @brief Specific methode to build a DataSet from a PolymorphismSequenceContainer.
+ */
+ static std::auto_ptr<DataSet> buildDataSet(const PolymorphismSequenceContainer& psc) throw (Exception);
+};
+} // end of namespace bpp;
+
+#endif // _DATASETTOOLS_H_
+
diff --git a/src/Bpp/PopGen/Date.cpp b/src/Bpp/PopGen/Date.cpp
new file mode 100644
index 0000000..ac5e9d3
--- /dev/null
+++ b/src/Bpp/PopGen/Date.cpp
@@ -0,0 +1,138 @@
+//
+// File Date.cpp
+// Author : Sylvain Gaillard <yragael2001 at yahoo.fr>
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include <Bpp/Text/TextTools.h>
+
+// From Local
+#include "Date.h"
+
+using namespace bpp;
+using namespace std;
+
+// ** Class constructor: *******************************************************/
+
+Date::Date(const int day, const int month, const int year) throw (BadIntegerException) : day_(day),
+ month_(month),
+ year_(year)
+{
+ if (day < 1 || day > 31)
+ throw (BadIntegerException("Date::Date: day must be in [1;31].", day));
+ if (month < 1 || month > 12)
+ throw (BadIntegerException("Date::Date: month must be in [1;12].", month));
+}
+
+Date::Date(const Date& date) : day_(date.getDay()),
+ month_(date.getMonth()),
+ year_(date.getYear()) {}
+
+// ** Class destructor: ********************************************************/
+
+Date::~Date() {}
+
+// ** Other methodes: **********************************************************/
+
+Date& Date::operator=(const Date& date)
+{
+ day_ = date.getDay();
+ month_ = date.getMonth();
+ year_ = date.getYear();
+ return *this;
+}
+
+void Date::setDate(const int day, const int month, const int year) throw (BadIntegerException)
+{
+ if (day >= 1 && day <= 31)
+ day_ = day;
+ else
+ throw (BadIntegerException("Date::Date: day must be in [1;31].", day));
+ if (month >= 1 && month <= 12)
+ month_ = month;
+ else
+ throw (BadIntegerException("Date::Date: month must be in [1;12].", month));
+ year_ = year;
+}
+
+void Date::setYear(const int year)
+{
+ year_ = year;
+}
+
+void Date::setMonth(const int month) throw (BadIntegerException)
+{
+ if (month >= 1 && month <= 12)
+ month_ = month;
+ else
+ throw (BadIntegerException("Date::Date: month must be in [1;12].", month));
+}
+
+void Date::setDay(const int day) throw (BadIntegerException)
+{
+ if (day >= 1 && day <= 31)
+ day_ = day;
+ else
+ throw (BadIntegerException("Date::Date: day must be in [1;31].", day));
+}
+
+std::string Date::getDateStr() const
+{
+ string date, uDay = "", uMonth = "";
+ if (day_ < 10)
+ uDay = "0";
+ if (month_ < 10)
+ uMonth = "0";
+ date = uDay + TextTools::toString(day_) + uMonth + TextTools::toString(month_) + TextTools::toString(year_);
+ return date;
+}
+
+bool Date::operator==(const Date& date) const
+{
+ if (day_ == date.getDay() && month_ == date.getMonth() && year_ == date.getYear())
+ return true;
+ else
+ return false;
+}
+
+bool Date::operator<(const Date& date) const
+{
+ if (year_ < date.getYear() || (month_ < date.getMonth() && year_ == date.getYear()) || (day_ < date.getDay() && month_ == date.getMonth() && year_ == date.getYear()))
+ return true;
+ else
+ return false;
+}
+
diff --git a/src/Bpp/PopGen/Date.h b/src/Bpp/PopGen/Date.h
new file mode 100644
index 0000000..38dc499
--- /dev/null
+++ b/src/Bpp/PopGen/Date.h
@@ -0,0 +1,196 @@
+//
+// File Date.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _DATE_H_
+#define _DATE_H_
+
+#include <Bpp/Exceptions.h>
+#include <Bpp/Clonable.h>
+
+namespace bpp
+{
+/**
+ * @brief The Date class
+ *
+ * This is a little class to deal with dates.
+ *
+ * @author Sylvain Gaillard
+ */
+class Date : public Clonable
+{
+private:
+ int day_;
+ int month_;
+ int year_;
+
+public:
+ // Constructors and destructor
+ /**
+ * @brief Build a new Date from three values.
+ *
+ * Build a new Date from three integers.
+ * The default Date is set to 01-01-2000.
+ *
+ * @param day The day between 1 and 31.
+ * @param month The month between 1 and 12.
+ * @param year The year as a signed int.
+ */
+ Date(const int day = 1, const int month = 1, const int year = 2000) throw (BadIntegerException);
+
+ /**
+ * @brief The Date copy constructor.
+ */
+ Date(const Date& date);
+
+ /**
+ * @brief Destroy the Date object.
+ */
+ ~Date();
+
+public:
+ // Methodes
+ /**
+ * @brief The Date copy operator.
+ *
+ * @return A ref toward the assigned Date.
+ */
+ Date& operator=(const Date& date);
+
+ /**
+ * @brief Set the Date.
+ *
+ * @param day The day as an integer between 1 and 31.
+ * @param month The month as an integer between 1 and 12.
+ * @param year The year as an integer.
+ */
+ void setDate(const int day, const int month, const int year) throw (BadIntegerException);
+
+ /**
+ * @brief Set the year.
+ *
+ * @param year The year as an integer.
+ */
+ void setYear(const int year);
+
+ /**
+ * @brief Set the month.
+ *
+ * @param month The month as an integer between 1 and 12.
+ */
+ void setMonth(const int month) throw (BadIntegerException);
+
+ /**
+ * @brief Set the day.
+ *
+ * @param day The day as an integer between 1 and 31.
+ */
+ void setDay(const int day) throw (BadIntegerException);
+
+ /**
+ * @brief Get the Date as a string.
+ *
+ * @return The date as a string DDMMYYYY (i.e. January 1 2000 : 01012000).
+ */
+ std::string getDateStr() const;
+
+ /**
+ * @brief Get the Year as an int.
+ */
+ int getYear() const { return year_; }
+
+ /**
+ * @brief Get the month as an int.
+ */
+ int getMonth() const { return month_; }
+
+ /**
+ * @brief Get the day as an int.
+ */
+ int getDay() const { return day_; }
+
+ /**
+ * @brief The == operator.
+ *
+ * Test the numerical equality between to dates.
+ */
+ bool operator==(const Date& date) const;
+
+ /**
+ * @brief The < operator.
+ *
+ * Return true if the left Date is minor than the right Date.
+ */
+ bool operator<(const Date& date) const;
+
+ /**
+ * @brief The != operator.
+ */
+ bool operator!=(const Date& date) const { return !(*this == date); }
+
+ /**
+ * @brief The > operator.
+ */
+ bool operator>(const Date& date) const { return date < *this; }
+
+ /**
+ * @brief The <= operator.
+ */
+ bool operator<=(const Date& date) const { return !(date < *this); }
+
+ /**
+ * @brief The >= operator.
+ */
+ bool operator>=(const Date& date) const { return !(*this < date); }
+
+ /**
+ * @name The Clonable interface
+ * @{
+ */
+#ifdef NO_VIRTUAL_COV
+ Clonable*
+#else
+ Date*
+#endif
+ clone() const { return new Date(*this); }
+};
+} // end of namespace bpp;
+
+#endif // _DATE_H_
+
diff --git a/src/Bpp/PopGen/GeneMapperCsvExport.cpp b/src/Bpp/PopGen/GeneMapperCsvExport.cpp
new file mode 100644
index 0000000..885fe3c
--- /dev/null
+++ b/src/Bpp/PopGen/GeneMapperCsvExport.cpp
@@ -0,0 +1,235 @@
+//
+// File: GeneMapperCsvExport.cpp
+// Author: Sylvain Gaillard
+// Created: April 2, 2008
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (April 2, 2008)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "GeneMapperCsvExport.h"
+
+using namespace bpp;
+using namespace std;
+
+const std::string GeneMapperCsvExport::SAMPLE_FILE_H = "Sample File";
+const std::string GeneMapperCsvExport::SAMPLE_NAME_H = "Sample Name";
+const std::string GeneMapperCsvExport::PANEL_H = "Panel";
+const std::string GeneMapperCsvExport::MARKER_H = "Marker";
+const std::string GeneMapperCsvExport::DYE_H = "Dye";
+const std::string GeneMapperCsvExport::ALLELE_H = "Allele ";
+const std::string GeneMapperCsvExport::SIZE_H = "Size ";
+const std::string GeneMapperCsvExport::HEIGHT_H = "Height ";
+const std::string GeneMapperCsvExport::PEAK_AREA_H = "Peak Area ";
+const std::string GeneMapperCsvExport::DAC_H = "DAC";
+const std::string GeneMapperCsvExport::AN_H = "AN";
+
+GeneMapperCsvExport::GeneMapperCsvExport(bool ia) : IndependentAlleles_(ia) {}
+
+GeneMapperCsvExport::~GeneMapperCsvExport() {}
+
+void GeneMapperCsvExport::read(std::istream& is, DataSet& data_set) throw (Exception)
+{
+ if (!is)
+ throw IOException("GeneMapperCsvExport::read: fail to open stream.");
+
+ /*
+ * Feed a DataTable with the data
+ */
+ DataTable* dtp = DataTable::read(is, "\t", true, -1);
+ DataTable& dt = *dtp;
+
+ /*
+ * Fixe the individuals' name if there is duplicate in the file
+ */
+ vector<string> ind_names;
+ vector<string> markers;
+ try
+ {
+ ind_names = dt.getColumn(SAMPLE_NAME_H);
+ markers = dt.getColumn(MARKER_H);
+ }
+ catch (Exception& e)
+ {
+ throw e;
+ }
+ map<string, int> indname_marker;
+ for (size_t i = 0; i < dt.getNumberOfRows(); i++)
+ {
+ string test_lab = dt(i, SAMPLE_NAME_H) + dt(i, MARKER_H);
+ if (indname_marker.find(test_lab) != indname_marker.end())
+ {
+ string new_lab = dt(i, SAMPLE_NAME_H) + "_" + TextTools::toString(indname_marker[test_lab] + 1);
+ dt (i, SAMPLE_NAME_H) = new_lab;
+ }
+ indname_marker[test_lab]++;
+ }
+ ind_names = dt.getColumn(SAMPLE_NAME_H);
+
+ map<string, size_t> ind_count = VectorTools::countValues(ind_names);
+ ind_names = VectorTools::unique(ind_names);
+ markers = VectorTools::unique(markers);
+ size_t loc_nbr = markers.size();
+
+ /*
+ * Loci number
+ */
+ data_set.initAnalyzedLoci(loc_nbr);
+
+ /*
+ * Group of individuals
+ */
+ data_set.addEmptyGroup(0);
+ for (unsigned int i = 0; i < ind_names.size(); i++)
+ {
+ Individual ind(ind_names[i]);
+ data_set.addIndividualToGroup(data_set.getGroupPosition(0), ind);
+ }
+
+ /*
+ * Loci data
+ */
+ AnalyzedLoci al(markers.size());
+ vector<string> col_names = dt.getColumnNames();
+
+ // Finds columns containing allele data
+ vector<unsigned int> alleles_cols;
+ for (unsigned int i = 0; i < col_names.size(); i++)
+ {
+ if (TextTools::startsWith(col_names[i], ALLELE_H))
+ alleles_cols.push_back(i);
+ }
+ // Set LocusInfo
+ vector<vector<unsigned int> > alleles_pos;
+ for (unsigned int i = 0; i < markers.size(); i++)
+ {
+ al.setLocusInfo(i, LocusInfo(markers[i], LocusInfo::UNKNOWN));
+ }
+ std::map< std::string, std::set< std::string > > markerAlleles;
+ for (unsigned int i = 0; i < dt.getNumberOfRows(); ++i)
+ {
+ for (unsigned int j = 0; j < alleles_cols.size(); ++j)
+ {
+ if (dt(i, alleles_cols[j]) != "")
+ {
+ markerAlleles[dt(i, MARKER_H)].insert(dt(i, alleles_cols[j]));
+ }
+ }
+ }
+ for (std::map< std::string, std::set< std::string > >::iterator itm = markerAlleles.begin(); itm != markerAlleles.end(); itm++)
+ {
+ std::set< std::string >& s = itm->second;
+ for (std::set< std::string >::iterator its = s.begin(); its != s.end(); its++)
+ {
+ al.addAlleleInfoByLocusName(itm->first, BasicAlleleInfo(*its));
+ }
+ }
+ data_set.setAnalyzedLoci(al);
+
+ /*
+ * Individuals informations
+ */
+ size_t ind_col_index = VectorTools::which(dt.getColumnNames(), SAMPLE_NAME_H);
+ size_t mark_col_index = VectorTools::which(dt.getColumnNames(), MARKER_H);
+ for (size_t i = 0; i < dt.getNumberOfRows(); i++)
+ {
+ vector<size_t> alleles;
+ for (size_t j = 0; j < alleles_cols.size(); j++)
+ {
+ if (!TextTools::isEmpty(dt(i, alleles_cols[j])))
+ {
+ unsigned int num = (data_set.getLocusInfoByName(dt(i, mark_col_index))).getAlleleInfoKey(dt(i, alleles_cols[j]));
+ alleles.push_back(num);
+ }
+ }
+ alleles = VectorTools::unique(alleles);
+ MultiAlleleMonolocusGenotype ma(alleles);
+ if (!data_set.getIndividualByIdFromGroup(0, dt(i, ind_col_index))->hasGenotype())
+ data_set.initIndividualGenotypeInGroup(0, data_set.getIndividualPositionInGroup(0, dt(i, ind_col_index)));
+ if (alleles.size())
+ data_set.setIndividualMonolocusGenotypeInGroup(0, data_set.getIndividualPositionInGroup(0, dt(i, ind_col_index)), data_set.getAnalyzedLoci()->getLocusInfoPosition(dt(i, mark_col_index)), ma);
+ }
+ delete dtp;
+}
+
+void GeneMapperCsvExport::read(const std::string& path, DataSet& data_set) throw (Exception)
+{
+ AbstractIDataSet::read(path, data_set);
+}
+
+DataSet* GeneMapperCsvExport::read(std::istream& is) throw (Exception)
+{
+ return AbstractIDataSet::read(is);
+}
+
+DataSet* GeneMapperCsvExport::read(const std::string& path) throw (Exception)
+{
+ return AbstractIDataSet::read(path);
+}
+
+// --- GeneMapperCsvExport::Record ---
+GeneMapperCsvExport::Record::Record(const std::string& row) : sampleFile_(),
+ sampleName_(),
+ panel_(),
+ markerName_(),
+ dye_(),
+ alleles_(),
+ dac_(),
+ an_(0.)
+{
+ StringTokenizer st(row, "\t", true, false);
+ /*
+ if (st.numberOfRemainingTokens() != 7 + 4 * alleleNumber) {
+ throw Exception("GeneMapperCsvExport::Record::Record: bad number of allele");
+ }
+ */
+ size_t itemNum = st.numberOfRemainingTokens();
+ size_t alleleNum = (itemNum - 7) / 4;
+ sampleFile_ = st.getToken(0);
+ sampleName_ = st.getToken(1);
+ panel_ = st.getToken(2);
+ markerName_ = st.getToken(3);
+ dye_ = st.getToken(4);
+ dac_ = st.getToken(itemNum - 2);
+ an_ = TextTools::toDouble(st.getToken(itemNum - 1));
+ for (unsigned int i = 0; i < alleleNum; ++i)
+ {
+ GeneMapperCsvExport::Allele al(
+ st.getToken(5 + i),
+ TextTools::toDouble(st.getToken(5 + alleleNum + i)),
+ TextTools::to<unsigned int>(st.getToken(5 + (2 * alleleNum) + i)),
+ TextTools::toDouble(st.getToken(5 + (3 * alleleNum) + i))
+ );
+ alleles_.push_back(al);
+ }
+}
diff --git a/src/Bpp/PopGen/GeneMapperCsvExport.h b/src/Bpp/PopGen/GeneMapperCsvExport.h
new file mode 100644
index 0000000..5f0e97f
--- /dev/null
+++ b/src/Bpp/PopGen/GeneMapperCsvExport.h
@@ -0,0 +1,213 @@
+//
+// File: GeneMapperCsvExport.h
+// Author: Sylvain Gaillard
+// Created: April 2, 2008
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (April 2, 2008)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _BPP_POPGEN_GENEMAPPERCSVEXPORT_H_
+#define _BPP_POPGEN_GENEMAPPERCSVEXPORT_H_
+
+#include <Bpp/Exceptions.h>
+#include <Bpp/Io/FileTools.h>
+#include <Bpp/Text/TextTools.h>
+#include <Bpp/Text/StringTokenizer.h>
+#include <Bpp/Numeric/DataTable.h>
+
+// From local Pop
+#include "AbstractIDataSet.h"
+#include "BasicAlleleInfo.h"
+#include "MultiAlleleMonolocusGenotype.h"
+
+namespace bpp
+{
+/**
+ * @brief The GeneMapperCsvExport input format for popgenlib.
+ *
+ * This input format takes a csv file exported from GeneMapper® (Applied Biosystems).
+ *
+ * @author Sylvain Gaillard
+ */
+class GeneMapperCsvExport : public AbstractIDataSet
+{
+public:
+ static const std::string SAMPLE_FILE_H;
+ static const std::string SAMPLE_NAME_H;
+ static const std::string PANEL_H;
+ static const std::string MARKER_H;
+ static const std::string DYE_H;
+ static const std::string ALLELE_H;
+ static const std::string SIZE_H;
+ static const std::string HEIGHT_H;
+ static const std::string PEAK_AREA_H;
+ static const std::string DAC_H;
+ static const std::string AN_H;
+
+private:
+ bool IndependentAlleles_;
+
+public:
+ // Constructor and destructor
+ GeneMapperCsvExport(bool ia = false);
+ ~GeneMapperCsvExport();
+
+ // public:
+ /**
+ * @brief Set if allels are concidered as independent markers.
+ *
+ */
+ // SetAllelsAsIndependent(bool flag);
+
+public:
+ /**
+ * @name The IDataSet interface.
+ * @{
+ */
+ void read(std::istream& is, DataSet& data_set) throw (Exception);
+ void read(const std::string& path, DataSet& data_set) throw (Exception);
+ DataSet* read(std::istream& is) throw (Exception);
+ DataSet* read(const std::string& path) throw (Exception);
+ /**
+ * @}
+ */
+
+ /**
+ * @name The IOFormat interface
+ * @{
+ */
+ virtual const std::string getFormatName() const
+ {
+ return "GeneMapper® cvs export";
+ }
+ virtual const std::string getFormatDescription() const
+ {
+ return "GeneMapper® is a flexible genotyping software package that provides DNA sizing and quality allele calls for all Applied Biosystems electrophoresis-based genotyping systems.";
+ }
+ /**
+ * @}
+ */
+
+ /**
+ * @brief Store data for one allele
+ */
+ class Allele
+ {
+private:
+ std::string name_;
+ double size_;
+ unsigned int height_;
+ double peakArea_;
+
+public:
+ Allele(const std::string& name, double size, unsigned int height, double peakArea) : name_(name),
+ size_(size),
+ height_(height),
+ peakArea_(peakArea) {}
+
+ const std::string& getName() const
+ {
+ return name_;
+ }
+ const double& getSize() const
+ {
+ return size_;
+ }
+ const unsigned int& getHeight() const
+ {
+ return height_;
+ }
+ const double& getPeakArea() const
+ {
+ return peakArea_;
+ }
+ };
+
+ /**
+ * @brief Store one line of the GeneMapper file
+ */
+ class Record
+ {
+private:
+ std::string sampleFile_;
+ std::string sampleName_;
+ std::string panel_;
+ std::string markerName_;
+ std::string dye_;
+ std::vector< GeneMapperCsvExport::Allele > alleles_;
+ std::string dac_;
+ double an_;
+
+public:
+ /**
+ * @brief Constructor
+ *
+ * @param row One row of the file as a std::string
+ */
+ Record(const std::string& row);
+
+ const std::string& getSampleFileName() const
+ {
+ return sampleFile_;
+ }
+ const std::string& getSampleName() const
+ {
+ return sampleName_;
+ }
+ const std::string& getPanel() const
+ {
+ return panel_;
+ }
+ const std::string& getMarkerName() const
+ {
+ return markerName_;
+ }
+ const std::string& getDye() const
+ {
+ return dye_;
+ }
+ const size_t getNumberOfAllele() const
+ {
+ return alleles_.size();
+ }
+ const GeneMapperCsvExport::Allele& getAllele(size_t allelePos) const
+ {
+ return alleles_[allelePos];
+ }
+ };
+};
+} // end of namespace bpp;
+
+#endif // _BPP_POPGEN_GENEMAPPERCSVEXPORT_H_
+
diff --git a/src/Bpp/PopGen/Genepop.cpp b/src/Bpp/PopGen/Genepop.cpp
new file mode 100644
index 0000000..69c9241
--- /dev/null
+++ b/src/Bpp/PopGen/Genepop.cpp
@@ -0,0 +1,208 @@
+//
+// File Genepop.cpp
+// Author : Sylvain Gaillard
+// Last modification : Tuesday September 21 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "Genepop.h"
+
+using namespace bpp;
+using namespace std;
+
+Genepop::Genepop() {}
+
+Genepop::~Genepop() {}
+
+void Genepop::read(istream& is, DataSet& data_set) throw (Exception)
+{
+ if (!is)
+ throw IOException("Genepop::read: fail to open stream.");
+ // Skip first line
+ FileTools::getNextLine(is);
+ ios::pos_type entry_point = is.tellg();
+ bool eof_ok = false;
+ bool loc_def_ok = false;
+ bool loc_nbr_ok = false;
+ size_t grp_nbr = 0;
+ vector<LocusInfo> tmp_loc;
+ vector<set<string> > al_ids;
+ map<string, size_t> ind_id_count;
+ map<string, size_t> ind_id_index;
+
+ string temp("");
+ // First read : file structure
+ while (!eof_ok)
+ {
+ if (is.peek() == EOF && !eof_ok)
+ {
+ // If eof rewind to entry_point
+ is.seekg(entry_point);
+ eof_ok = true;
+ }
+ else
+ {
+ // Count everything
+ temp = FileTools::getNextLine(is);
+ string cp_temp = TextTools::removeSurroundingWhiteSpaces(temp);
+ cp_temp = TextTools::toUpper(cp_temp);
+ if (cp_temp == string("POP"))
+ {
+ loc_def_ok = true;
+ grp_nbr++;
+ data_set.addEmptyGroup(grp_nbr);
+ }
+ if (!loc_def_ok)
+ {
+ StringTokenizer st(temp, string(", "), true);
+ while (st.hasMoreToken())
+ tmp_loc.push_back(LocusInfo(TextTools::removeSurroundingWhiteSpaces(st.nextToken())));
+ }
+ if (loc_def_ok && !loc_nbr_ok)
+ {
+ al_ids.resize(tmp_loc.size());
+ loc_nbr_ok = true;
+ }
+ if (loc_def_ok)
+ {
+ string alleles;
+ StringTokenizer st(temp, string(","));
+ if (st.numberOfRemainingTokens() == 2)
+ {
+ ind_id_count[TextTools::removeSurroundingWhiteSpaces(st.nextToken())]++;
+ alleles = st.nextToken();
+ }
+ StringTokenizer st2(alleles);
+ if ((size_t)st2.numberOfRemainingTokens() == tmp_loc.size())
+ {
+ size_t i = 0;
+ while (st2.hasMoreToken())
+ {
+ string ids = TextTools::removeSurroundingWhiteSpaces(st2.nextToken());
+ string tmp_id = string(ids.begin(), ids.begin() + (ids.size() / 2));
+ if (tmp_id != string("00") && tmp_id != string("000"))
+ al_ids[i].insert(tmp_id);
+ tmp_id = string(ids.begin() + (ids.size() / 2), ids.end());
+ if (tmp_id != string("00") && tmp_id != string("000"))
+ al_ids[i].insert(tmp_id);
+ i++;
+ }
+ }
+ }
+ }
+ }
+
+ // Set AnalyzedLoci
+ data_set.initAnalyzedLoci(tmp_loc.size());
+ for (size_t i = 0; i < tmp_loc.size(); i++)
+ {
+ data_set.setLocusInfo(i, tmp_loc[i]);
+ for (set<string>::iterator it = al_ids[i].begin(); it != al_ids[i].end(); it++)
+ {
+ data_set.addAlleleInfoByLocusPosition(i, BasicAlleleInfo(*it));
+ }
+ }
+
+ // Second read : file data
+ grp_nbr = 0;
+ size_t grp_pos = 0;
+ loc_def_ok = false;
+ while (!is.eof())
+ {
+ temp = FileTools::getNextLine(is);
+ string cp_temp = TextTools::removeSurroundingWhiteSpaces(temp);
+ cp_temp = TextTools::toUpper(cp_temp);
+ if (cp_temp == string("POP"))
+ {
+ grp_nbr++;
+ loc_def_ok = true;
+ grp_pos = data_set.getGroupPosition(grp_nbr);
+ }
+ else
+ {
+ if (loc_def_ok)
+ {
+ string alleles;
+ StringTokenizer st(temp, string(","));
+ size_t ind_pos = 0;
+ if (st.numberOfRemainingTokens() == 2)
+ {
+ string ind_id = TextTools::removeSurroundingWhiteSpaces(st.nextToken());
+ if (ind_id_count[ind_id] > 1)
+ ind_id = ind_id + string("_") + TextTools::toString(++ind_id_index[ind_id]);
+ data_set.addEmptyIndividualToGroup(grp_pos, ind_id);
+ ind_pos = data_set.getIndividualPositionInGroup(grp_pos, ind_id);
+ data_set.initIndividualGenotypeInGroup(grp_pos, ind_pos);
+ alleles = st.nextToken();
+ }
+ StringTokenizer st2(alleles);
+ if ((size_t)st2.numberOfRemainingTokens() == tmp_loc.size())
+ {
+ size_t i = 0;
+ while (st2.hasMoreToken())
+ {
+ string ids = TextTools::removeSurroundingWhiteSpaces(st2.nextToken());
+ vector<string> tmp_ids;
+ tmp_ids.push_back(string(ids.begin(), ids.begin() + (ids.size() / 2)));
+ tmp_ids.push_back(string(ids.begin() + (ids.size() / 2), ids.end()));
+ if (tmp_ids[0] != string("00") && tmp_ids[0] != string("000")
+ && tmp_ids[1] != string("00") && tmp_ids[1] != string("000"))
+ {
+ data_set.setIndividualMonolocusGenotypeByAlleleIdInGroup(grp_pos, ind_pos, i, tmp_ids);
+ }
+ i++;
+ tmp_ids.clear();
+ }
+ }
+ }
+ }
+ }
+}
+
+void Genepop::read(const string& path, DataSet& data_set) throw (Exception)
+{
+ AbstractIDataSet::read(path, data_set);
+}
+
+DataSet* Genepop::read(istream& is) throw (Exception)
+{
+ return AbstractIDataSet::read(is);
+}
+
+DataSet* Genepop::read(const string& path) throw (Exception)
+{
+ return AbstractIDataSet::read(path);
+}
+
diff --git a/src/Bpp/PopGen/Genepop.h b/src/Bpp/PopGen/Genepop.h
new file mode 100644
index 0000000..43b9708
--- /dev/null
+++ b/src/Bpp/PopGen/Genepop.h
@@ -0,0 +1,99 @@
+//
+// File Genepop.h
+// Author : Sylvain Gaillard
+// Last modification : Tuesday September 21 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _GENEPOP_H_
+#define _GENEPOP_H_
+
+#include <Bpp/Exceptions.h>
+#include <Bpp/Io/FileTools.h>
+#include <Bpp/Text/TextTools.h>
+#include <Bpp/Text/StringTokenizer.h>
+
+// From local Pop
+#include "AbstractIDataSet.h"
+#include "BasicAlleleInfo.h"
+
+namespace bpp
+{
+/**
+ * @brief The Genepop input format for popgenlib.
+ *
+ * @author Sylvain Gaillard
+ */
+class Genepop :
+ public AbstractIDataSet
+{
+public:
+ // Constructor and destructor
+ Genepop();
+ ~Genepop();
+
+public:
+ /**
+ * @name The IDataSet interface.
+ * @{
+ */
+ void read(std::istream& is, DataSet& data_set) throw (Exception);
+ void read(const std::string& path, DataSet& data_set) throw (Exception);
+ DataSet* read(std::istream& is) throw (Exception);
+ DataSet* read(const std::string& path) throw (Exception);
+ /**
+ * @}
+ */
+
+ /**
+ * @name The IOFormat interface
+ * @{
+ */
+ const std::string getFormatName() const
+ {
+ return "Genepop ver 3.4";
+ }
+
+ const std::string getFormatDescription() const
+ {
+ return "Genepop is a software for populations genetic for DOS operating system";
+ }
+ /**
+ * @}
+ */
+};
+} // end of namespace bpp;
+
+#endif // _GENEPOP_H_
diff --git a/src/Bpp/PopGen/GeneralExceptions.cpp b/src/Bpp/PopGen/GeneralExceptions.cpp
new file mode 100644
index 0000000..98b88ed
--- /dev/null
+++ b/src/Bpp/PopGen/GeneralExceptions.cpp
@@ -0,0 +1,220 @@
+//
+// File GeneralExceptions.cpp
+// Author : Sylvain Gaillard
+// Last modification: Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "GeneralExceptions.h"
+
+#include <Bpp/Text/TextTools.h>
+
+using namespace bpp;
+using namespace std;
+
+// ** BadIdentifierException **************************************************/
+
+BadIdentifierException::BadIdentifierException(const char* text,
+ const size_t id) : Exception("BadIdentifierException: " +
+ string(text) + "(" + TextTools::toString(id) + ")"),
+ id_(TextTools::toString(id)) {}
+
+BadIdentifierException::BadIdentifierException(const std::string& text,
+ const size_t id) : Exception("BadIdentifierException: " +
+ text + "(" + TextTools::toString(id) + ")"),
+ id_(TextTools::toString(id)) {}
+
+BadIdentifierException::BadIdentifierException(const char* text,
+ const std::string& id) : Exception("BadIdentifierException: " + string(text) +
+ "(" + id + ")"),
+ id_(id) {}
+
+BadIdentifierException::BadIdentifierException(const std::string& text,
+ const std::string& id) : Exception("BadIdentifierException: " + text +
+ "(" + id + ")"),
+ id_(id) {}
+
+BadIdentifierException::~BadIdentifierException() throw () {}
+
+const std::string BadIdentifierException::getIdentifier() const
+{
+ return id_;
+}
+
+// ** LocusNotFoundException **************************************************/
+
+LocusNotFoundException::LocusNotFoundException(const char* text,
+ const size_t id) : BadIdentifierException("LocusNotFoundException: " +
+ string(text) + "(" + TextTools::toString(id) + ")",
+ id) {}
+
+LocusNotFoundException::LocusNotFoundException(const std::string& text,
+ const size_t id) : BadIdentifierException("LocusNotFoundException: " +
+ text + "(" + TextTools::toString(id) + ")",
+ id) {}
+
+LocusNotFoundException::LocusNotFoundException(const char* text,
+ const std::string& id) : BadIdentifierException("LocusNotFoundException: " + string(text) +
+ "(" + id + ")",
+ id) {}
+
+LocusNotFoundException::LocusNotFoundException(const std::string& text,
+ const std::string& id) : BadIdentifierException("LocusNotFoundException: " + text +
+ "(" + id + ")",
+ id) {}
+
+LocusNotFoundException::~LocusNotFoundException() throw () {}
+
+const std::string LocusNotFoundException::getIdentifier() const
+{
+ return BadIdentifierException::getIdentifier();
+}
+
+// ** AlleleNotFoundException **************************************************/
+
+AlleleNotFoundException::AlleleNotFoundException(const char* text,
+ const size_t id) : BadIdentifierException("AlleleNotFoundException: " +
+ string(text) + "(" + TextTools::toString(id) + ")",
+ id) {}
+
+AlleleNotFoundException::AlleleNotFoundException(const std::string& text,
+ const size_t id) : BadIdentifierException("AlleleNotFoundException: " +
+ text + "(" + TextTools::toString(id) + ")",
+ id) {}
+
+AlleleNotFoundException::AlleleNotFoundException(const char* text,
+ const std::string& id) : BadIdentifierException("AlleleNotFoundException: " + string(text) +
+ "(" + id + ")",
+ id) {}
+
+AlleleNotFoundException::AlleleNotFoundException(const std::string& text,
+ const std::string& id) : BadIdentifierException("AlleleNotFoundException: " + text +
+ "(" + id + ")",
+ id) {}
+
+AlleleNotFoundException::~AlleleNotFoundException() throw () {}
+
+const std::string AlleleNotFoundException::getIdentifier() const
+{
+ return BadIdentifierException::getIdentifier();
+}
+
+// ** LocalityNotFoundException **************************************************/
+
+LocalityNotFoundException::LocalityNotFoundException(const char* text,
+ const size_t id) : BadIdentifierException("LocalityNotFoundException: " +
+ string(text) + "(" + TextTools::toString(id) + ")",
+ id) {}
+
+LocalityNotFoundException::LocalityNotFoundException(const std::string& text,
+ const size_t id) : BadIdentifierException("LocalityNotFoundException: " +
+ text + "(" + TextTools::toString(id) + ")",
+ id) {}
+
+LocalityNotFoundException::LocalityNotFoundException(const char* text,
+ const std::string& id) : BadIdentifierException("LocalityNotFoundException: " + string(text) +
+ "(" + id + ")",
+ id) {}
+
+LocalityNotFoundException::LocalityNotFoundException(const std::string& text,
+ const std::string& id) : BadIdentifierException("LocalityNotFoundException: " + text +
+ "(" + id + ")",
+ id) {}
+
+LocalityNotFoundException::~LocalityNotFoundException() throw () {}
+
+const std::string LocalityNotFoundException::getIdentifier() const
+{
+ return BadIdentifierException::getIdentifier();
+}
+
+// ** IndividualNotFoundException **************************************************/
+
+IndividualNotFoundException::IndividualNotFoundException(const char* text,
+ const size_t id) : BadIdentifierException("IndividualNotFoundException: " +
+ string(text) + "(" + TextTools::toString(id) + ")",
+ id) {}
+
+IndividualNotFoundException::IndividualNotFoundException(const std::string& text,
+ const size_t id) : BadIdentifierException("IndividualNotFoundException: " +
+ text + "(" + TextTools::toString(id) + ")",
+ id) {}
+
+IndividualNotFoundException::IndividualNotFoundException(const char* text,
+ const std::string& id) : BadIdentifierException("IndividualNotFoundException: " + string(text) +
+ "(" + id + ")",
+ id) {}
+
+IndividualNotFoundException::IndividualNotFoundException(const std::string& text,
+ const std::string& id) : BadIdentifierException("IndividualNotFoundException: " + text +
+ "(" + id + ")",
+ id) {}
+
+IndividualNotFoundException::~IndividualNotFoundException() throw () {}
+
+const std::string IndividualNotFoundException::getIdentifier() const
+{
+ return BadIdentifierException::getIdentifier();
+}
+
+// ** GroupNotFoundException **************************************************/
+
+GroupNotFoundException::GroupNotFoundException(const char* text,
+ const size_t id) : BadIdentifierException("GroupNotFoundException: " +
+ string(text) + "(" + TextTools::toString(id) + ")",
+ id) {}
+
+GroupNotFoundException::GroupNotFoundException(const std::string& text,
+ const size_t id) : BadIdentifierException("GroupNotFoundException: " +
+ text + "(" + TextTools::toString(id) + ")",
+ id) {}
+
+GroupNotFoundException::GroupNotFoundException(const char* text,
+ const std::string& id) : BadIdentifierException("GroupNotFoundException: " + string(text) +
+ "(" + id + ")",
+ id) {}
+
+GroupNotFoundException::GroupNotFoundException(const std::string& text,
+ const std::string& id) : BadIdentifierException("GroupNotFoundException: " + text +
+ "(" + id + ")",
+ id) {}
+
+GroupNotFoundException::~GroupNotFoundException() throw () {}
+
+const std::string GroupNotFoundException::getIdentifier() const
+{
+ return BadIdentifierException::getIdentifier();
+}
+
diff --git a/src/Bpp/PopGen/GeneralExceptions.h b/src/Bpp/PopGen/GeneralExceptions.h
new file mode 100644
index 0000000..3b2489a
--- /dev/null
+++ b/src/Bpp/PopGen/GeneralExceptions.h
@@ -0,0 +1,299 @@
+//
+// File GeneralExceptions.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _GENERALEXCEPTIONS_H_
+#define _GENERALEXCEPTIONS_H_
+
+// From STL
+#include <string>
+
+#include <Bpp/Exceptions.h>
+
+namespace bpp
+{
+// ****************************************************************************
+//
+/**
+ * @brief The BadIdentifierException class.
+ *
+ * This exception is used when an identifier is not found.
+ * The identifier can be either a string or an integer but its
+ * value is stored as a string.
+ *
+ * @author Sylvain Gaillard
+ */
+class BadIdentifierException :
+ public Exception
+{
+public:
+ // Class constructor
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ BadIdentifierException(const char* text, size_t id);
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ BadIdentifierException(const std::string& text, size_t id);
+
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ BadIdentifierException(const char* text, const std::string& id);
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ BadIdentifierException(const std::string& text, const std::string& id);
+
+ // Class destructor
+ ~BadIdentifierException() throw ();
+
+public:
+ /**
+ * @brief Return the value of the identifier as a string.
+ */
+ virtual const std::string getIdentifier() const;
+
+protected:
+ const std::string id_;
+};
+
+// *****************************************************************************
+
+/**
+ * @brief The LocusNotFoundException class.
+ */
+class LocusNotFoundException :
+ public BadIdentifierException
+{
+public:
+ // Class constructor
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ LocusNotFoundException(const char* text, size_t id);
+
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ LocusNotFoundException(const std::string& text, size_t id);
+
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ LocusNotFoundException(const char* text, const std::string& id);
+
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ LocusNotFoundException(const std::string& text, const std::string& id);
+
+ // Class destructor
+ ~LocusNotFoundException() throw ();
+
+public:
+ /**
+ * @brief Return the value of the identifier as a string.
+ */
+ virtual const std::string getIdentifier() const;
+};
+
+// *****************************************************************************
+
+/**
+ * @brief The AlleleNotFoundException class.
+ */
+class AlleleNotFoundException :
+ public BadIdentifierException
+{
+public:
+ // Class constructor
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ AlleleNotFoundException(const char* text, size_t id);
+
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ AlleleNotFoundException(const std::string& text, size_t id);
+
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ AlleleNotFoundException(const char* text, const std::string& id);
+
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ AlleleNotFoundException(const std::string& text, const std::string& id);
+
+ // Class destructor
+ ~AlleleNotFoundException() throw ();
+
+public:
+ /**
+ * @brief Return the value of the identifier as a string.
+ */
+ virtual const std::string getIdentifier() const;
+};
+
+// *****************************************************************************
+
+/**
+ * @brief The LocalityNotFoundException class.
+ */
+class LocalityNotFoundException :
+ public BadIdentifierException
+{
+public:
+ // Class constructor
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ LocalityNotFoundException(const char* text, size_t id);
+
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ LocalityNotFoundException(const std::string& text, size_t id);
+
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ LocalityNotFoundException(const char* text, const std::string& id);
+
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ LocalityNotFoundException(const std::string& text, const std::string& id);
+
+ // Class destructor
+ ~LocalityNotFoundException() throw ();
+
+public:
+ /**
+ * @brief Return the value of the identifier as a string.
+ */
+ virtual const std::string getIdentifier() const;
+};
+
+// *****************************************************************************
+
+/**
+ * @brief The IndividualNotFoundException class.
+ */
+class IndividualNotFoundException :
+ public BadIdentifierException
+{
+public:
+ // Class constructor
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ IndividualNotFoundException(const char* text, size_t id);
+
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ IndividualNotFoundException(const std::string& text, size_t id);
+
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ IndividualNotFoundException(const char* text, const std::string& id);
+
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ IndividualNotFoundException(const std::string& text, const std::string& id);
+
+ // Class destructor
+ ~IndividualNotFoundException() throw ();
+
+public:
+ /**
+ * @brief Return the value of the identifier as a string.
+ */
+ virtual const std::string getIdentifier() const;
+};
+
+// *****************************************************************************
+
+/**
+ * @brief The GroupNotFoundException class.
+ */
+class GroupNotFoundException :
+ public BadIdentifierException
+{
+public:
+ // Class constructor
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ GroupNotFoundException(const char* text, size_t id);
+
+ /**
+ * @brief Build the exception with a numerical identifier.
+ */
+ GroupNotFoundException(const std::string& text, size_t id);
+
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ GroupNotFoundException(const char* text, const std::string& id);
+
+ /**
+ * @brief Build the exception with a textual identifier.
+ */
+ GroupNotFoundException(const std::string& text, const std::string& id);
+
+ // Class destructor
+ ~GroupNotFoundException() throw ();
+
+public:
+ /**
+ * @brief Return the value of the identifier as a string.
+ */
+ virtual const std::string getIdentifier() const;
+};
+} // end of namespace bpp;
+
+#endif // _GENERALEXCEPTIONS_H_
+
diff --git a/src/Bpp/PopGen/Genetix.cpp b/src/Bpp/PopGen/Genetix.cpp
new file mode 100644
index 0000000..4367630
--- /dev/null
+++ b/src/Bpp/PopGen/Genetix.cpp
@@ -0,0 +1,135 @@
+//
+// File Genetix.cpp
+// Authors : Sylvain Gaillard
+// Khalid Belkhir
+// Last modification : Monday August 02 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "Genetix.h"
+
+using namespace bpp;
+using namespace std;
+
+Genetix::Genetix() {}
+
+Genetix::~Genetix() {}
+
+void Genetix::read(istream& is, DataSet& data_set) throw (Exception)
+{
+ if (!is)
+ throw IOException("Genetix::read: fail to open stream.");
+ // Loci number
+ string temp = FileTools::getNextLine(is);
+ unsigned int loc_nbr;
+ stringstream(temp) >> loc_nbr;
+ data_set.initAnalyzedLoci(loc_nbr);
+
+ // Groups number
+ temp = FileTools::getNextLine(is);
+ unsigned int grp_nbr;
+ stringstream(temp) >> grp_nbr;
+
+ // Loci data
+ for (unsigned int i = 0; i < loc_nbr; i++)
+ {
+ // Locus name
+ string name = FileTools::getNextLine(is);
+ name = TextTools::removeSurroundingWhiteSpaces(name);
+ LocusInfo tmp_loc(name);
+ // Alleles
+ stringstream values(FileTools::getNextLine(is));
+ unsigned int nbr_al;
+ values >> nbr_al;
+ for (unsigned int j = 0; j < nbr_al; j++)
+ {
+ string al_id;
+ values >> al_id;
+ BasicAlleleInfo tmp_al(al_id);
+ tmp_loc.addAlleleInfo(tmp_al);
+ }
+ data_set.setLocusInfo(i, tmp_loc);
+ }
+
+ // Groups
+ for (unsigned int i = 0; i < grp_nbr; i++)
+ {
+ data_set.addEmptyGroup(i);
+ // Group name ... Now used khalid
+ temp = FileTools::getNextLine(is);
+ data_set.setGroupName(i, temp);
+
+ // Number of individuals
+ unsigned int ind_nbr;
+ temp = FileTools::getNextLine(is);
+ stringstream tmp(temp);
+ tmp >> ind_nbr;
+ for (unsigned int j = 0; j < ind_nbr; j++)
+ {
+ temp = FileTools::getNextLine(is);
+ string ind_name(temp.begin(), temp.begin() + 11);
+ temp = string(temp.begin() + 11, temp.end());
+ data_set.addEmptyIndividualToGroup(i, TextTools::removeSurroundingWhiteSpaces(ind_name) + string("_") + TextTools::toString(i + 1) + string("_") + TextTools::toString(j + 1));
+ data_set.initIndividualGenotypeInGroup(i, j);
+ StringTokenizer alleles(temp, string(" "));
+ // cout << alleles.numberOfRemainingTokens() << endl;
+ for (unsigned int k = 0; k < loc_nbr; k++)
+ {
+ string tmp_string = alleles.nextToken();
+ vector<string> tmp_alleles;
+ tmp_alleles.push_back(string(tmp_string.begin(), tmp_string.begin() + 3));
+ tmp_alleles.push_back(string(tmp_string.begin() + 3, tmp_string.begin() + 6));
+ if (tmp_alleles[0] != string("000") && tmp_alleles[1] != string("000"))
+ data_set.setIndividualMonolocusGenotypeByAlleleIdInGroup(i, j, k, tmp_alleles);
+ }
+ }
+ }
+}
+
+void Genetix::read(const string& path, DataSet& data_set) throw (Exception)
+{
+ AbstractIDataSet::read(path, data_set);
+}
+
+DataSet* Genetix::read(istream& is) throw (Exception)
+{
+ return AbstractIDataSet::read(is);
+}
+
+DataSet* Genetix::read(const string& path) throw (Exception)
+{
+ return AbstractIDataSet::read(path);
+}
+
diff --git a/src/Bpp/PopGen/Genetix.h b/src/Bpp/PopGen/Genetix.h
new file mode 100644
index 0000000..10540d5
--- /dev/null
+++ b/src/Bpp/PopGen/Genetix.h
@@ -0,0 +1,100 @@
+//
+// File Genetix.h
+// Author : Sylvain Gaillard
+// Khalid Belkhir
+// Last modification : Friday July 30 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _GENETIX_H_
+#define _GENETIX_H_
+
+#include <Bpp/Exceptions.h>
+#include <Bpp/Io/FileTools.h>
+#include <Bpp/Text/TextTools.h>
+#include <Bpp/Text/StringTokenizer.h>
+
+// From local Pop
+#include "AbstractIDataSet.h"
+#include "BasicAlleleInfo.h"
+
+namespace bpp
+{
+/**
+ * @brief The Genetix input format for popgenlib.
+ *
+ * @author Sylvain Gaillard
+ */
+class Genetix :
+ public AbstractIDataSet
+{
+public:
+ // Constructor and destructor
+ Genetix();
+ ~Genetix();
+
+public:
+ /**
+ * @name The IDataSet interface.
+ * @{
+ */
+ void read(std::istream& is, DataSet& data_set) throw (Exception);
+ void read(const std::string& path, DataSet& data_set) throw (Exception);
+ DataSet* read(std::istream& is) throw (Exception);
+ DataSet* read(const std::string& path) throw (Exception);
+ /**
+ * @}
+ */
+
+ /**
+ * @name The IOFormat interface
+ * @{
+ */
+ const std::string getFormatName() const
+ {
+ return "Genetix ver 4.05";
+ }
+ const std::string getFormatDescription() const
+ {
+ return "Genetix is a software for populations genetic for Windows(tm)";
+ }
+ /**
+ * @}
+ */
+};
+} // end of namespace bpp;
+
+#endif // _GENETIX_H_
+
diff --git a/src/Bpp/PopGen/Group.cpp b/src/Bpp/PopGen/Group.cpp
new file mode 100644
index 0000000..c76280c
--- /dev/null
+++ b/src/Bpp/PopGen/Group.cpp
@@ -0,0 +1,626 @@
+//
+// File Group.cpp
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "Group.h"
+
+using namespace bpp;
+using namespace std;
+
+// ** Class constructors: ******************************************************/
+Group::Group(size_t group_id) : id_(group_id),
+ name_(""),
+ individuals_(vector<Individual*>()) {}
+
+Group::Group(const Group& group) : id_(group.getGroupId()),
+ name_(group.getGroupName()),
+ // individuals_(vector<Individuals*>(group.getNumberOfIndividuals()))
+ individuals_(vector<Individual*>())
+{
+ for (size_t i = 0; i < group.getNumberOfIndividuals(); i++)
+ {
+ addIndividual(group.getIndividualAtPosition(i));
+ }
+}
+
+Group::Group(const Group& group, size_t group_id) : id_(group_id),
+ name_(group.getGroupName()),
+ individuals_(vector<Individual*>())
+{
+ for (size_t i = 0; i < group.getNumberOfIndividuals(); i++)
+ {
+ addIndividual(group.getIndividualAtPosition(i));
+ }
+}
+
+// ** Class destructor: ********************************************************/
+
+Group::~Group () {}
+
+// ** Other methodes: **********************************************************/
+
+Group& Group::operator=(const Group& group)
+{
+ setGroupId(group.getGroupId());
+ for (size_t i = 0; i < group.getNumberOfIndividuals(); i++)
+ {
+ addIndividual(group.getIndividualAtPosition(i));
+ }
+ return *this;
+}
+
+void Group::setGroupId(size_t group_id)
+{
+ id_ = group_id;
+}
+
+void Group::setGroupName(const std::string& group_name)
+{
+ name_ = group_name;
+}
+
+void Group::addIndividual(const Individual& ind) throw (BadIdentifierException)
+{
+ try
+ {
+ getIndividualPosition(ind.getId());
+ throw BadIdentifierException("Group::addIndividual: individual id already used.", ind.getId());
+ }
+ catch (BadIdentifierException& bie)
+ {}
+ individuals_.push_back(new Individual(ind));
+}
+
+void Group::addEmptyIndividual(const std::string& individual_id) throw (BadIdentifierException)
+{
+ for (size_t i = 0; i < getNumberOfIndividuals(); i++)
+ {
+ if (individuals_[i]->getId() == individual_id)
+ throw BadIdentifierException("Group::addEmptyIndividual: individual_id already in use.", individual_id);
+ }
+ individuals_.push_back(new Individual(individual_id));
+}
+
+size_t Group::getIndividualPosition(const std::string& individual_id) const throw (IndividualNotFoundException)
+{
+ for (size_t i = 0; i < getNumberOfIndividuals(); i++)
+ {
+ if (individuals_[i]->getId() == individual_id)
+ return i;
+ }
+ throw IndividualNotFoundException("Group::getIndividualPosition: individual_id not found.", individual_id);
+}
+
+std::auto_ptr<Individual> Group::removeIndividualById(const std::string& individual_id) throw (IndividualNotFoundException)
+{
+ try
+ {
+ size_t indPos = getIndividualPosition(individual_id);
+ auto_ptr<Individual> ind(individuals_[indPos]);
+ individuals_.erase(individuals_.begin() + indPos);
+ return ind;
+ }
+ catch (IndividualNotFoundException& infe)
+ {
+ throw IndividualNotFoundException("Group::removeIndividualById: individual_id not found.", individual_id);
+ }
+}
+
+std::auto_ptr<Individual> Group::removeIndividualAtPosition(size_t individual_position) throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= individuals_.size())
+ throw IndexOutOfBoundsException("Group::removeIndividualAtPosition.", individual_position, 0, individuals_.size());
+ auto_ptr<Individual> ind(individuals_[individual_position]);
+ individuals_.erase(individuals_.begin() + individual_position);
+ return ind;
+}
+
+void Group::deleteIndividualById(const std::string& individual_id) throw (IndividualNotFoundException)
+{
+ try
+ {
+ removeIndividualById(individual_id);
+ }
+ catch (IndividualNotFoundException& infe)
+ {
+ throw IndividualNotFoundException("Group::deleteIndividualById: individual_id not found.", individual_id);
+ }
+}
+
+void Group::deleteIndividualAtPosition(size_t individual_position) throw (IndexOutOfBoundsException)
+{
+ try
+ {
+ removeIndividualAtPosition(individual_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("Group::deleteIndividualAtPosition.", individual_position, 0, getNumberOfIndividuals());
+ }
+}
+
+void Group::clear()
+{
+ for (size_t i = 0; i < individuals_.size(); i++)
+ {
+ delete (individuals_[i]);
+ }
+ individuals_.clear();
+}
+
+const Individual& Group::getIndividualById(const std::string& individual_id) const throw (IndividualNotFoundException)
+{
+ for (size_t i = 0; i < individuals_.size(); i++)
+ {
+ if (individuals_[i]->getId() == individual_id)
+ return getIndividualAtPosition(i);
+ }
+ throw IndividualNotFoundException("Group::getIndividualById: individual_id not found.", individual_id);
+}
+
+const Individual& Group::getIndividualAtPosition(size_t individual_position) const
+throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= individuals_.size())
+ throw IndexOutOfBoundsException("Group::getIndividualAtPosition: individual_position out of bounds.", individual_position, 0, individuals_.size());
+ return *individuals_[individual_position];
+}
+
+size_t Group::getNumberOfIndividuals() const
+{
+ return individuals_.size();
+}
+
+size_t Group::getMaxNumberOfSequences() const
+{
+ size_t maxnum = 0;
+ for (size_t i = 0; i < getNumberOfIndividuals(); i++)
+ {
+ vector<size_t> seqpos = individuals_[i]->getSequencesPositions();
+ for (size_t j = 0; j < seqpos.size(); j++)
+ {
+ if (maxnum < seqpos[j])
+ maxnum = seqpos[j];
+ }
+ }
+ return maxnum + 1;
+}
+
+// -- Dealing with individual's properties -----------------
+
+void Group::setIndividualSexAtPosition(size_t individual_position, const unsigned short sex) throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::setIndividualSexAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ individuals_[individual_position]->setSex(sex);
+}
+
+unsigned short Group::getIndividualSexAtPosition(size_t individual_position) const throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::getIndividualSexAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ return individuals_[individual_position]->getSex();
+}
+
+void Group::setIndividualDateAtPosition(size_t individual_position, const Date& date) throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::setIndividualDateAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ individuals_[individual_position]->setDate(date);
+}
+
+const Date& Group::getIndividualDateAtPosition(size_t individual_position) const throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::getIndividualDateAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ return individuals_[individual_position]->getDate();
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::getIndividualDateAtPosition: individual has no date.");
+ }
+}
+
+void Group::setIndividualCoordAtPosition(size_t individual_position, const Point2D<double>& coord) throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::setIndividualCoordAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ individuals_[individual_position]->setCoord(coord);
+}
+
+const Point2D<double>& Group::getIndividualCoordAtPosition(size_t individual_position) const throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::getIndividualCoordAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ return individuals_[individual_position]->getCoord();
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::getIndividualCoordAtPosition: individual has no coordinates.");
+ }
+}
+
+void Group::setIndividualLocalityAtPosition(size_t individual_position, const Locality<double>* locality) throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::setIndividualLocalityAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ individuals_[individual_position]->setLocality(locality);
+}
+
+const Locality<double>& Group::getIndividualLocalityAtPosition(size_t individual_position) const throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::getIndividualLocalityAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ return *individuals_[individual_position]->getLocality();
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::getIndividualLocalityAtPosition: individuals has no locality.");
+ }
+}
+
+void Group::addIndividualSequenceAtPosition(size_t individual_position, size_t sequence_position, const Sequence& sequence) throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::addIndividualSequenceAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ individuals_[individual_position]->addSequence(sequence_position, sequence);
+ }
+ catch (AlphabetMismatchException& ame)
+ {
+ throw AlphabetMismatchException("Group::addIndividualSequenceAtPosition: sequence's alphabet doesn't match.", ame.getAlphabets()[0], ame.getAlphabets()[1]);
+ }
+ catch (BadIdentifierException& bie)
+ {
+ throw BadIdentifierException("Group::addIndividualSequenceAtPosition: sequence's name already in use.", bie.getIdentifier());
+ }
+ catch (BadIntegerException& bie)
+ {
+ throw BadIntegerException("Group::addIndividualSequenceAtPosition: sequence_position already in use.", bie.getBadInteger());
+ }
+}
+
+const Sequence& Group::getIndividualSequenceByName(size_t individual_position, const string& sequence_name) const throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::getIndividualSequenceByName: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ return individuals_[individual_position]->getSequenceByName(sequence_name);
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::getIndividualSequenceByName: no sequence data in individual.");
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("Group::getIndividualSequenceByName: sequence_name not found.", snfe.getSequenceId());
+ }
+}
+
+const Sequence& Group::getIndividualSequenceAtPosition(size_t individual_position, size_t sequence_position) const throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::getIndividualAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ return individuals_[individual_position]->getSequenceAtPosition(sequence_position);
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::getIndividualSequenceAtPosition: no sequence data in individual.");
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("Group::getIndividualSequenceAtPosition: sequence_position not found.", snfe.getSequenceId());
+ }
+}
+
+void Group::deleteIndividualSequenceByName(size_t individual_position, const string& sequence_name) throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::deleteIndividualSequenceByName: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ individuals_[individual_position]->deleteSequenceByName(sequence_name);
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::deleteSequenceByName: no sequence data in individual.");
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("Group::deleteSequenceByName: sequence_name not found.", snfe.getSequenceId());
+ }
+}
+
+void Group::deleteIndividualSequenceAtPosition(size_t individual_position, size_t sequence_position) throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::deleteIndividualSequenceAtPosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ individuals_[individual_position]->deleteSequenceAtPosition(sequence_position);
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::deleteSequenceAtPosition: no sequence data in individual.");
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("Group::deleteSequenceAtPosition: sequence_position not found.", snfe.getSequenceId());
+ }
+}
+
+bool Group::hasIndividualSequences(size_t individual_position) const throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::hasIndividualSequences: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ return individuals_[individual_position]->hasSequences();
+}
+
+vector<string> Group::getIndividualSequencesNames(size_t individual_position) const throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::getIndividualSequencesNames: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ return individuals_[individual_position]->getSequencesNames();
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::getSequencesNames: no sequence data in individual.");
+ }
+}
+
+size_t Group::getIndividualSequencePosition(size_t individual_position, const string& sequence_name) const throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::getIndividualSequencePosition: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ return individuals_[individual_position]->getSequencePosition(sequence_name);
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::getSequencePosition: no sequence data in individual.");
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("Group::getSequencePosition: sequence_name not found.", snfe.getSequenceId());
+ }
+}
+
+size_t Group::getIndividualNumberOfSequences(size_t individual_position) const throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::getIndividualNumberOfSequences: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ return individuals_[individual_position]->getNumberOfSequences();
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::getIndividualNumberOfSequences: no sequence data in individual.");
+ }
+}
+
+void Group::setIndividualSequences(size_t individual_position, const MapSequenceContainer& msc) throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::setIndividualSequences: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ individuals_[individual_position]->setSequences(msc);
+}
+
+void Group::setIndividualGenotype(size_t individual_position, const MultilocusGenotype& genotype) throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::setIndividualGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ individuals_[individual_position]->setGenotype(genotype);
+}
+
+void Group::initIndividualGenotype(size_t individual_position, size_t loci_number) throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::initIndividualGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ individuals_[individual_position]->initGenotype(loci_number);
+ }
+ catch (BadIntegerException& bie)
+ {
+ throw BadIntegerException("Group::initIndividualGenotype: loci_number must be > 0.", bie.getBadInteger());
+ }
+ catch (Exception)
+ {
+ throw Exception("Group::initIndividualGenotype: individual already has a genotype.");
+ }
+}
+
+void Group::deleteIndividualGenotype(size_t individual_position) throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::deleteIndividualGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ individuals_[individual_position]->deleteGenotype();
+}
+
+bool Group::hasIndividualGenotype(size_t individual_position) const throw (IndexOutOfBoundsException)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::hasIndividualGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ return individuals_[individual_position]->hasGenotype();
+}
+
+void Group::setIndividualMonolocusGenotype(size_t individual_position, size_t locus_position, const MonolocusGenotype& monogen) throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ individuals_[individual_position]->setMonolocusGenotype(locus_position, monogen);
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::setIndividualMonolocusGenotype: individual has no genotype.");
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotype: locus_position excedes the number of locus.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+void Group::setIndividualMonolocusGenotypeByAlleleKey(size_t individual_position, size_t locus_position, const std::vector<size_t>& allele_keys) throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotypeByAlleleKey: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ individuals_[individual_position]->setMonolocusGenotypeByAlleleKey(locus_position, allele_keys);
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::setIndividualMonolocusGenotypeByAlleleKey: individual has no genotype.");
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotypeByAlleleKey: locus_position excedes the number of locus.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (Exception)
+ {
+ throw Exception("Group::setIndividualMonolocusGenotypeByAlleleKey: no key in allele_keys.");
+ }
+}
+
+void Group::setIndividualMonolocusGenotypeByAlleleId(size_t individual_position, size_t locus_position, const std::vector<std::string>& allele_id, const LocusInfo& locus_info) throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotypeByAlleleId: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ individuals_[individual_position]->setMonolocusGenotypeByAlleleId(locus_position, allele_id, locus_info);
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::setIndividualMonolocusGenotypeByAlleleId: individual has no genotype.");
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("Group::setIndividualMonolocusGenotypeByAlleleId: locus_position excedes the number of locus.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (AlleleNotFoundException& anfe)
+ {
+ throw AlleleNotFoundException("Group::setIndividualMonolocusGenotypeByAlleleId: id not found.", anfe.getIdentifier());
+ }
+}
+
+const MonolocusGenotype& Group::getIndividualMonolocusGenotype(size_t individual_position, size_t locus_position) const throw (Exception)
+{
+ if (individual_position >= getNumberOfIndividuals())
+ throw IndexOutOfBoundsException("Group::getIndividualMonolocusGenotype: individual_position out of bounds.", individual_position, 0, getNumberOfIndividuals());
+ try
+ {
+ return individuals_[individual_position]->getMonolocusGenotype(locus_position);
+ }
+ catch (NullPointerException& npe)
+ {
+ throw NullPointerException("Group::getIndividualMonolocusGenotype: individual has no genotype.");
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("Group::getIndividualMonolocusGenotype: locus_position excedes the number of locus.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+bool Group::hasSequenceData() const
+{
+ for (size_t i = 0; i < getNumberOfIndividuals(); i++)
+ {
+ if (hasIndividualSequences(i))
+ return true;
+ }
+ return false;
+}
+
+const Alphabet* Group::getAlphabet() const throw (NullPointerException)
+{
+ for (size_t i = 0; i < getNumberOfIndividuals(); i++)
+ {
+ if (hasIndividualSequences(i))
+ return individuals_[i]->getSequenceAlphabet();
+ }
+ throw NullPointerException("Group::getAlphabet: individual has no sequence data.");
+}
+
+size_t Group::getGroupSizeForLocus(size_t locus_position) const
+{
+ size_t count = 0;
+ for (size_t i = 0; i < individuals_.size(); i++)
+ {
+ if (individuals_[i]->hasGenotype() && !individuals_[i]->getGenotype().isMonolocusGenotypeMissing(locus_position))
+ count++;
+ }
+ return count;
+}
+
+size_t Group::getGroupSizeForSequence(size_t sequence_position) const
+{
+ size_t count = 0;
+ for (size_t i = 0; i < individuals_.size(); i++)
+ {
+ if (individuals_[i]->hasSequences())
+ {
+ try
+ {
+ individuals_[i]->getSequenceAtPosition(sequence_position);
+ count++;
+ }
+ catch (...)
+ {}
+ }
+ }
+ return count;
+}
+
diff --git a/src/Bpp/PopGen/Group.h b/src/Bpp/PopGen/Group.h
new file mode 100644
index 0000000..bddcc93
--- /dev/null
+++ b/src/Bpp/PopGen/Group.h
@@ -0,0 +1,506 @@
+//
+// File Group.h
+// Author : Sylvain Gaillard
+// Khalid Belkhir
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _GROUP_H_
+#define _GROUP_H_
+
+// From STL
+#include <vector>
+#include <memory>
+
+#include <Bpp/Exceptions.h>
+#include <Bpp/Graphics/Point2D.h>
+
+// From SeqLib
+#include <Bpp/Seq/Container/VectorSequenceContainer.h>
+#include <Bpp/Seq/Container/VectorSiteContainer.h>
+#include <Bpp/Seq/Container/SequenceContainerTools.h>
+
+// From local
+#include "Individual.h"
+#include "GeneralExceptions.h"
+
+namespace bpp
+{
+/**
+ * @brief The Group class.
+ *
+ * A Group is an ensemble of Individuals with some statistics like the average
+ * allele number.
+ *
+ * @author Sylvain Gaillard
+ */
+class Group
+{
+protected:
+ size_t id_;
+ std::string name_;
+ std::vector<Individual*> individuals_;
+
+public:
+ // Constructors and destructor :
+ /**
+ * @brief Build a void new Group.
+ */
+ Group(size_t group_id);
+
+ /**
+ * @brief Copy constructor.
+ *
+ * If you need to use a copy constructor in a DataSet context, use the one
+ * which specify a new Group Id.
+ */
+ Group(const Group& group);
+
+ /**
+ * @brief A duplication constructor with new Group Id.
+ */
+ Group(const Group& group, size_t group_id);
+
+ /**
+ * @brief Destroy an Group.
+ */
+ ~Group();
+
+public:
+ /**
+ * @brief The assignation operator =.
+ */
+ Group& operator=(const Group& group);
+
+ /**
+ * @brief Set the id of the Group.
+ *
+ * @param group_id The id of the Group as an size_t.
+ */
+ void setGroupId(size_t group_id);
+
+ /**
+ * @brief Get the name of the Group.
+ *
+ * @return The name of the Group as a string.
+ */
+ const std::string& getGroupName() const { return name_; }
+
+ /**
+ * @brief Set the name of the Group.
+ *
+ * @param group_name Name of the Group as string.
+ */
+ void setGroupName(const std::string& group_name);
+
+ /**
+ * @brief Get the id of the Group.
+ *
+ * @return The id of the Group as an size_t.
+ */
+ size_t getGroupId() const { return id_; }
+
+ /**
+ * @brief Add an Individual.
+ *
+ * Add an Individual to the group.
+ *
+ * @param ind The Individual to add to the Group.
+ * @throw BadIdentifierException if individual's identifier is already in use.
+ */
+ void addIndividual(const Individual& ind) throw (BadIdentifierException);
+
+ /**
+ * @brief Add an empty Individual to the Group.
+ *
+ * @throw BadIdentifierException if individual_id is already in use.
+ */
+ void addEmptyIndividual(const std::string& individual_id) throw (BadIdentifierException);
+
+ /**
+ * @brief Get the number of Individual in the Group.
+ *
+ * @return An integer as the number of Individual.
+ */
+ size_t getNumberOfIndividuals() const;
+
+ /**
+ * @brief Get the maximum number of sequence.
+ *
+ * Give the value of the highest sequence key. This value is usefull to
+ * discover the missing sequences data for each individual.
+ */
+ size_t getMaxNumberOfSequences() const;
+
+ /**
+ * @brief Get the position of an Individual.
+ *
+ * @throw IndividualNotFoundException if individual_id is not found.
+ */
+ size_t getIndividualPosition(const std::string& individual_id) const
+ throw (IndividualNotFoundException);
+
+ /**
+ * @brief Get a reference to an Individual.
+ *
+ * @param individual_id The id of the Individual to find.
+ *
+ * @return A pointer to the Individual or NULL if the Individual is not found.
+ */
+ const Individual& getIndividualById(const std::string& individual_id) const throw (IndividualNotFoundException);
+
+ /**
+ * @brief Get a reference to an Individual by its position.
+ *
+ * @param individual_position The position of the Individual in the group.
+ *
+ * @return A pointer to the Individual.
+ * @throw IndividualNotFoundException if individual_id is not found.
+ */
+ const Individual& getIndividualAtPosition(size_t individual_position) const
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Remove an Individual from the Group.
+ *
+ * @param individual_id The id of the Individual to remove.
+ *
+ * @return An std::auto_ptr to the removed Individual.
+ * @throw IndividualNotFoundException if individual_id is not found.
+ *
+ * Search an Individual in the Group by cheking the id and remove it
+ * if it is found then return a pointer to this Individual.
+ */
+ std::auto_ptr<Individual> removeIndividualById(const std::string& individual_id) throw (IndividualNotFoundException);
+
+ /**
+ * @brief Remove an Individual from the Group.
+ *
+ * @param individual_position The position in the Group of the Individual to remove.
+ *
+ * @return An std::auto_ptr to the removed Individual.
+ *
+ * Remove the individual at the specified position and return a pointer
+ * to this Individual.
+ */
+ std::auto_ptr<Individual> removeIndividualAtPosition(size_t individual_position) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Delete an Individual from the Group.
+ *
+ * @param individual_id The id of the Individual to delete.
+ * @throw IndividualNotFoundException if individual_id is not found.
+ *
+ * Search an Individual in the Group by cheking the id and delete it
+ * if it is foundi and free the memory by calling the destructor of the
+ * Individual.
+ */
+ void deleteIndividualById(const std::string& individual_id) throw (IndividualNotFoundException);
+
+ /**
+ * @brief Delete an Individual from the Group.
+ *
+ * @param individual_position The position in the Group of the Individual to delete.
+ *
+ * Free the memory by calling the destructor of the Individual.
+ */
+ void deleteIndividualAtPosition(size_t individual_position) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Clear the Group.
+ *
+ * Delete all the Individuals of the group.
+ */
+ void clear();
+
+ // -- Dealing with Individuals -----------------------------
+ /**
+ * @brief Set the sex of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ */
+ void setIndividualSexAtPosition(size_t individual_position, const unsigned short sex)
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the sex of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ */
+ unsigned short getIndividualSexAtPosition(size_t individual_position) const
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Set the date of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ */
+ void setIndividualDateAtPosition(size_t individual_position, const Date& date)
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the date of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if the Individual has no date.
+ */
+ const Date& getIndividualDateAtPosition(size_t individual_position) const
+ throw (Exception);
+
+ /**
+ * @brief Set the coordinates of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ */
+ void setIndividualCoordAtPosition(size_t individual_position, const Point2D<double>& coord)
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the coordinates of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if the individual has no coordinate.
+ */
+ const Point2D<double>& getIndividualCoordAtPosition(size_t individual_position) const
+ throw (Exception);
+
+ /**
+ * @brief Set the locality of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ */
+ void setIndividualLocalityAtPosition(size_t individual_position, const Locality<double>* locality)
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the locality of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if the individual has no locality.
+ */
+ const Locality<double>& getIndividualLocalityAtPosition(size_t individual_position) const
+ throw (Exception);
+
+ /**
+ * @brief Add a sequence to an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw AlphabetMismatchException if the sequence's alphabet doesn't match the container's alphabet.
+ * @throw BadIdentifierException if the sequence's name is already in use.
+ * @throw BadIntegerException if sequence_position is already in use.
+ */
+ void addIndividualSequenceAtPosition(size_t individual_position,
+ size_t sequence_position, const Sequence& sequence)
+ throw (Exception);
+
+ /**
+ * @brief Get a sequence of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if there is no sequence container defined in the individual.
+ * @throw SequenceNotFoundException if sequence_name is not found.
+ */
+ const Sequence& getIndividualSequenceByName(size_t individual_position, const std::string& sequence_name) const
+ throw (Exception);
+
+ /**
+ * @brief Get a sequence of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if there is no sequence container defined in the individual.
+ * @throw SequenceNotFoundException if sequence_position is not found.
+ */
+ const Sequence& getIndividualSequenceAtPosition(size_t individual_position, size_t sequence_position) const
+ throw (Exception);
+
+ /**
+ * @brief Delete a sequence of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if there is no sequence container defined in the individual.
+ * @throw SequenceNotFoundException if sequence_name is not found.
+ */
+ void deleteIndividualSequenceByName(size_t individual_position, const std::string& sequence_name)
+ throw (Exception);
+
+ /**
+ * @brief Delete a sequence of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if there is no sequence container defined in the individual.
+ * @throw SequenceNotFoundException if sequence_position is not found.
+ */
+ void deleteIndividualSequenceAtPosition(size_t individual_position, size_t sequence_position)
+ throw (Exception);
+
+ /**
+ * @brief Tell if the Individual has some sequences.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ */
+ bool hasIndividualSequences(size_t individual_position) const throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the sequences' names from an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if there is no sequence container defined in the individual.
+ */
+ std::vector<std::string> getIndividualSequencesNames(size_t individual_position) const throw (Exception);
+
+ /**
+ * @brief Get the position of a sequence in an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if there is no sequence container defined in the individual.
+ * @throw SequenceNotFoundException if sequence_name is not found.
+ */
+ size_t getIndividualSequencePosition(size_t individual_position, const std::string& sequence_name) const
+ throw (Exception);
+
+ /**
+ * @brief Get the number of sequences in an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if there is no sequence container defined in the individual.
+ */
+ size_t getIndividualNumberOfSequences(size_t individual_position) const throw (Exception);
+
+ /**
+ * @brief Set all the sequences by copying an OrderedSequenceContainer.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ */
+ void setIndividualSequences(size_t individual_position, const MapSequenceContainer& msc)
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Set the genotype of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ */
+ void setIndividualGenotype(size_t individual_position, const MultilocusGenotype& genotype) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Initialyze the genotype of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw BadIntegerException if loci_number < 1.
+ * @throw Exception if the individual already has a genotype.
+ */
+ void initIndividualGenotype(size_t individual_position, size_t loci_number)
+ throw (Exception);
+
+ /**
+ * @brief Delete the genotype of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ */
+ void deleteIndividualGenotype(size_t individual_position) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Tell if an Individual has a genotype.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ */
+ bool hasIndividualGenotype(size_t individual_position) const throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Set a MonolocusGenotype of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if the individual has no genotype.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of locus.
+ */
+ void setIndividualMonolocusGenotype(size_t individual_position, size_t locus_position,
+ const MonolocusGenotype& monogen) throw (Exception);
+
+ /**
+ * @brief Set a MonolocusGenotype of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if the individual has no genotype.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of locus.
+ * @throw Exception if there is no key in allele_keys.
+ */
+ void setIndividualMonolocusGenotypeByAlleleKey(size_t individual_position, size_t locus_position,
+ const std::vector<size_t>& allele_keys) throw (Exception);
+
+ /**
+ * @brief Set a MonolocusGenotype of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if the individual has no genotype.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of locus.
+ * @throw AlleleNotFoundException if at least one id is not found in locus_info.
+ */
+ void setIndividualMonolocusGenotypeByAlleleId(size_t individual_position, size_t locus_position,
+ const std::vector<std::string>& allele_id, const LocusInfo& locus_info) throw (Exception);
+
+ /**
+ * @brief Get a MonolocusGenotype of an Individual.
+ *
+ * @throw IndexOutOfBoundsException if individual_position excedes the number of individuals.
+ * @throw NullPointerException if the individual has no genotype.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of locus.
+ */
+ const MonolocusGenotype& getIndividualMonolocusGenotype(size_t individual_position,
+ size_t locus_position) const throw (Exception);
+
+ /**
+ * @brief Tell if at least one individual has at least one sequence.
+ */
+ bool hasSequenceData() const;
+
+ /**
+ * @brief Get the alphabet used for the sequences.
+ */
+ const Alphabet* getAlphabet() const throw (NullPointerException);
+
+ /**
+ * @brief Get the number of individual that have a data at the specified locus.
+ */
+ size_t getGroupSizeForLocus(size_t locus_position) const;
+
+ /**
+ * @brief Get the number of individual that have a sequence at the specified position.
+ */
+ size_t getGroupSizeForSequence(size_t sequence_position) const;
+};
+} // end of namespace bpp;
+
+#endif // _GROUP_H_
+
diff --git a/src/Bpp/PopGen/IDataSet.h b/src/Bpp/PopGen/IDataSet.h
new file mode 100644
index 0000000..361a577
--- /dev/null
+++ b/src/Bpp/PopGen/IDataSet.h
@@ -0,0 +1,85 @@
+//
+// File IDataSet.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _IDATASET_H_
+#define _IDATASET_H_
+
+#include "IODataSet.h"
+
+#include <Bpp/Exceptions.h>
+
+namespace bpp
+{
+/**
+ * @brief The IDataSet interface.
+ *
+ * @author Sylvain Gaillard
+ */
+class IDataSet :
+ public virtual IODataSet
+{
+public:
+ // Class destructor
+ virtual ~IDataSet() {}
+
+public:
+ /**
+ * @brief Read a DataSet on istream.
+ */
+ virtual void read(std::istream& is, DataSet& data_set) throw (Exception) = 0;
+
+ /**
+ * @brief Read a DataSet from a text file.
+ */
+ virtual void read(const std::string& path, DataSet& data_set) throw (Exception) = 0;
+
+ /**
+ * @brief Read istream and return a DataSet.
+ */
+ virtual DataSet* read(std::istream& is) throw (Exception) = 0;
+
+ /**
+ * @brief Read a text file and return a DataSet.
+ */
+ virtual DataSet* read(const std::string& path) throw (Exception) = 0;
+};
+} // end of namespace bpp;
+
+#endif // _IDATASET_H_
+
diff --git a/src/Bpp/PopGen/IODataSet.h b/src/Bpp/PopGen/IODataSet.h
new file mode 100644
index 0000000..388a4e9
--- /dev/null
+++ b/src/Bpp/PopGen/IODataSet.h
@@ -0,0 +1,75 @@
+//
+// File IODataSet.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+// Secured inclusion of header's file
+#ifndef _IODATASET_H_
+#define _IODATASET_H_
+
+#include "DataSet.h"
+
+#include <Bpp/Io/IoFormat.h>
+
+// From STL
+#include <iostream>
+#include <fstream>
+
+namespace bpp
+{
+/**
+ * @brief Interface for input/ouput with DataSet.
+ *
+ * IODataSet is a virtual class.
+ * This is an interface to declare commune methodes for in/out action on DataSet.
+ *
+ * @author Sylvain Gaillard
+ */
+class IODataSet : public virtual IOFormat
+{
+ /**
+ * @name The IOFormat interface.
+ * @{
+ */
+ const std::string getDataType() const { return "DataSet for population genetics"; }
+ /**
+ * @}
+ */
+};
+} // end of namespace bpp;
+
+#endif // _IODATASET_H_
+
diff --git a/src/Bpp/PopGen/Individual.cpp b/src/Bpp/PopGen/Individual.cpp
new file mode 100644
index 0000000..101fa60
--- /dev/null
+++ b/src/Bpp/PopGen/Individual.cpp
@@ -0,0 +1,632 @@
+//
+// File Individual.cpp
+// Author : Sylvain Gaillard
+// Last modification : Tuesday August 03 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "Individual.h"
+
+using namespace bpp;
+
+using namespace std;
+
+// ** Class constructor: *******************************************************/
+Individual::Individual() : id_(""),
+ sex_(0),
+ date_(0),
+ coord_(0),
+ locality_(0),
+ sequences_(0),
+ genotype_(0) {}
+
+Individual::Individual(const std::string& id) : id_(id),
+ sex_(0),
+ date_(0),
+ coord_(0),
+ locality_(0),
+ sequences_(0),
+ genotype_(0) {}
+
+Individual::Individual(const string& id,
+ const Date& date,
+ const Point2D<double>& coord,
+ Locality<double>* locality,
+ const unsigned short sex) :
+ id_(id),
+ sex_(sex),
+ date_(new Date(date)),
+ coord_(new Point2D<double>(coord)),
+ locality_(locality),
+ sequences_(0),
+ genotype_(0) {}
+
+Individual::Individual(const Individual& ind) : id_(ind.getId()),
+ sex_(ind.getSex()),
+ date_(0),
+ coord_(0),
+ locality_(0),
+ sequences_(0),
+ genotype_(0)
+{
+ try
+ {
+ setDate(ind.getDate());
+ }
+ catch (...)
+ {}
+ try
+ {
+ setCoord(ind.getCoord());
+ }
+ catch (...)
+ {}
+ try
+ {
+ setLocality(ind.getLocality());
+ }
+ catch (...)
+ {}
+ try
+ {
+ setSequences(dynamic_cast<const MapSequenceContainer&>(ind.getSequences()));
+ }
+ catch (...)
+ {}
+ if (ind.hasGenotype())
+ genotype_.reset(new MultilocusGenotype(ind.getGenotype()));
+}
+
+// ** Class destructor: *******************************************************/
+Individual::~Individual () {}
+
+// ** Other methodes: *********************************************************/
+
+Individual& Individual::operator=(const Individual& ind)
+{
+ setId(ind.getId());
+ setSex(ind.getSex());
+ try
+ {
+ setDate(ind.getDate());
+ }
+ catch (NullPointerException)
+ {
+ date_.reset();
+ }
+ try
+ {
+ setCoord(ind.getCoord());
+ }
+ catch (NullPointerException)
+ {
+ coord_.reset();
+ }
+ try
+ {
+ setLocality(ind.getLocality());
+ }
+ catch (NullPointerException)
+ {
+ locality_ = 0;
+ }
+ try
+ {
+ setSequences(dynamic_cast<const MapSequenceContainer&>(ind.getSequences()));
+ }
+ catch (NullPointerException)
+ {
+ sequences_.reset();
+ }
+ genotype_.reset(ind.hasGenotype() ? new MultilocusGenotype(ind.getGenotype()) : 0);
+ return *this;
+}
+
+/******************************************************************************/
+
+// Id
+void Individual::setId(const std::string& id)
+{
+ id_ = id;
+}
+
+/******************************************************************************/
+
+// Sex
+void Individual::setSex(const unsigned short sex)
+{
+ sex_ = sex;
+}
+
+/******************************************************************************/
+
+// Date
+void Individual::setDate(const Date& date)
+{
+ date_.reset(new Date(date));
+}
+
+/******************************************************************************/
+
+const Date& Individual::getDate() const throw (NullPointerException)
+{
+ if (hasDate())
+ return *date_.get();
+ else
+ throw (NullPointerException("Individual::getDate: no date associated to this individual."));
+}
+
+/******************************************************************************/
+
+bool Individual::hasDate() const
+{
+ return date_.get() != 0;
+}
+
+/******************************************************************************/
+
+// Coord
+void Individual::setCoord(const Point2D<double>& coord)
+{
+ coord_.reset(new Point2D<double>(coord));
+}
+
+/******************************************************************************/
+
+void Individual::setCoord(const double x, const double y)
+{
+ coord_.reset(new Point2D<double>(x, y));
+}
+
+/******************************************************************************/
+
+const Point2D<double>& Individual::getCoord() const throw (NullPointerException)
+{
+ if (hasCoord())
+ return *coord_.get();
+ else
+ throw (NullPointerException("Individual::getCoord: no coord associated to this individual."));
+}
+
+/******************************************************************************/
+
+bool Individual::hasCoord() const
+{
+ return coord_.get() != 0;
+}
+
+/******************************************************************************/
+
+void Individual::setX(const double x) throw (NullPointerException)
+{
+ if (hasCoord())
+ coord_->setX(x);
+ else
+ throw (NullPointerException("Individual::setX: no coord associated to this individual."));
+}
+
+/******************************************************************************/
+
+void Individual::setY(const double y) throw (NullPointerException)
+{
+ if (hasCoord())
+ coord_->setY(y);
+ else
+ throw (NullPointerException("Individual::setY: no coord associated to this individual."));
+}
+
+/******************************************************************************/
+
+double Individual::getX() const throw (NullPointerException)
+{
+ if (hasCoord())
+ return coord_->getX();
+ else
+ throw (NullPointerException("Individual::getX: no coord associated to this individual."));
+}
+
+/******************************************************************************/
+
+double Individual::getY() const throw (NullPointerException)
+{
+ if (hasCoord())
+ return coord_->getY();
+ else
+ throw (NullPointerException("Individual::getY: no coord associated to this individual."));
+}
+
+/******************************************************************************/
+
+// Locality
+void Individual::setLocality(const Locality<double>* locality)
+{
+ locality_ = locality;
+}
+
+/******************************************************************************/
+
+const Locality<double>* Individual::getLocality() const throw (NullPointerException)
+{
+ if (hasLocality())
+ return locality_;
+ else
+ throw (NullPointerException("Individual::getLocality: no locality associated to this individual."));
+}
+
+/******************************************************************************/
+
+bool Individual::hasLocality() const
+{
+ return locality_ != 0;
+}
+
+/******************************************************************************/
+
+// Sequences
+void Individual::addSequence(size_t sequence_key, const Sequence& sequence)
+throw (Exception)
+{
+ if (sequences_.get() == 0)
+ sequences_.reset(new MapSequenceContainer(sequence.getAlphabet()));
+ try
+ {
+ sequences_->addSequence(TextTools::toString(sequence_key), sequence);
+ }
+ catch (AlphabetMismatchException& ame)
+ {
+ throw (AlphabetMismatchException("Individual::addSequence: alphabets don't match.", ame.getAlphabets()[0], ame.getAlphabets()[1]));
+ }
+ catch (Exception& e)
+ {
+ if (string(e.what()).find("name") < string(e.what()).size())
+ throw (BadIdentifierException("Individual::addSequence: sequence's name already in use.", sequence.getName()));
+ // if (string(e.what()).find("key") < string(e.what()).size())
+ else
+ throw (Exception("Individual::addSequence: sequence_key already in use:" + TextTools::toString(sequence_key)));
+ }
+}
+
+/******************************************************************************/
+
+const Sequence& Individual::getSequenceByName(const std::string& sequence_name)
+const throw (Exception)
+{
+ if (sequences_.get() == 0)
+ throw NullPointerException("Individual::getSequenceByName: no sequence data.");
+ try
+ {
+ return sequences_->getSequence(sequence_name);
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("Individual::getSequenceByName: sequence_name not found.", snfe.getSequenceId());
+ }
+}
+
+/******************************************************************************/
+
+const Sequence& Individual::getSequenceAtPosition(size_t sequence_position)
+const throw (Exception)
+{
+ if (sequences_.get() == 0)
+ throw NullPointerException("Individual::getSequenceAtPosition: no sequence data.");
+ try
+ {
+ return sequences_->getSequenceByKey(TextTools::toString(sequence_position));
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("Individual::getSequenceAtPosition: sequence_position not found", snfe.getSequenceId());
+ }
+}
+
+/******************************************************************************/
+
+void Individual::deleteSequenceByName(const std::string& sequence_name) throw (Exception)
+{
+ if (sequences_.get() == 0)
+ throw NullPointerException("Individual::deleteSequenceByName: no sequence data.");
+ try
+ {
+ sequences_->deleteSequence(sequence_name);
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("Individual::deleteSequenceByName: sequence_name not found.", snfe.getSequenceId());
+ }
+}
+
+/******************************************************************************/
+
+void Individual::deleteSequenceAtPosition(size_t sequence_position) throw (Exception)
+{
+ if (sequences_.get() == 0)
+ throw NullPointerException("Individual::deleteSequenceAtPosition: no sequence data.");
+ try
+ {
+ sequences_->deleteSequenceByKey(TextTools::toString(sequence_position));
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("Individual::deleteSequenceAtPosition: sequence_position not found.", snfe.getSequenceId());
+ }
+}
+
+/******************************************************************************/
+
+std::vector<std::string> Individual::getSequencesNames() const throw (NullPointerException)
+{
+ if (sequences_.get() == 0)
+ throw NullPointerException("Individual::getSequencesNames: no sequence data.");
+ return sequences_->getSequencesNames();
+}
+
+/******************************************************************************/
+
+std::vector<size_t> Individual::getSequencesPositions() const throw (NullPointerException)
+{
+ if (sequences_.get() == 0)
+ throw NullPointerException("Individual::getSequencesPositions: no sequence data.");
+ vector<size_t> seqpos;
+ vector<string> seqkeys = sequences_->getKeys();
+ for (size_t i = 0; i < seqkeys.size(); i++)
+ {
+ seqpos.push_back((size_t) TextTools::toInt(seqkeys[i]));
+ }
+ return seqpos;
+}
+
+/******************************************************************************/
+
+size_t Individual::getSequencePosition(const std::string& sequence_name) const throw (Exception)
+{
+ if (sequences_.get() == 0)
+ throw NullPointerException("Individual::getSequencePosition: no sequence data.");
+ try
+ {
+ return (size_t) TextTools::toInt(sequences_->getKey(getSequencePosition(sequence_name)));
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("Individual::getSequencePosition: sequence_name not found.", snfe.getSequenceId());
+ }
+}
+
+/******************************************************************************/
+
+bool Individual::hasSequences() const
+{
+ return !(getNumberOfSequences() == 0);
+}
+
+/******************************************************************************/
+
+bool Individual::hasSequenceAtPosition(size_t position) const
+{
+ if (hasSequences())
+ {
+ vector<size_t> pos = getSequencesPositions();
+ for (size_t i = 0; i < pos.size(); i++)
+ {
+ if (pos[i] == position)
+ return true;
+ }
+ }
+ return false;
+}
+
+/******************************************************************************/
+
+const Alphabet* Individual::getSequenceAlphabet() const throw (NullPointerException)
+{
+ if (sequences_.get() == 0)
+ throw NullPointerException("Individual::getSequenceAlphabet: no sequence data.");
+ return sequences_->getAlphabet();
+}
+
+/******************************************************************************/
+
+size_t Individual::getNumberOfSequences() const
+{
+ if (sequences_.get() == 0)
+ return 0;
+ return sequences_->getNumberOfSequences();
+}
+
+/******************************************************************************/
+
+void Individual::setSequences(const MapSequenceContainer& msc)
+{
+ sequences_.reset(new MapSequenceContainer(msc));
+}
+
+/******************************************************************************/
+
+const OrderedSequenceContainer& Individual::getSequences() const throw (NullPointerException)
+{
+ if (sequences_.get() == 0)
+ throw NullPointerException("Individual::getSequences: no sequence data.");
+ return *sequences_;
+}
+
+/******************************************************************************/
+
+// MultilocusGenotype
+
+void Individual::setGenotype(const MultilocusGenotype& genotype)
+{
+ genotype_.reset(new MultilocusGenotype(genotype));
+}
+
+/******************************************************************************/
+
+void Individual::initGenotype(size_t loci_number) throw (Exception)
+{
+ if (hasGenotype())
+ throw Exception("Individual::initGenotype: individual already has a genotype.");
+ try
+ {
+ genotype_.reset(new MultilocusGenotype(loci_number));
+ }
+ catch (BadIntegerException& bie)
+ {
+ throw BadIntegerException("Individual::initGenotype: loci_number must be > 0.", bie.getBadInteger());
+ }
+}
+
+/******************************************************************************/
+
+const MultilocusGenotype& Individual::getGenotype() const throw (NullPointerException)
+{
+ if (!hasGenotype())
+ throw NullPointerException("Individual::getGenotype: individual has no genotype.");
+ return *genotype_;
+}
+
+/******************************************************************************/
+
+void Individual::deleteGenotype()
+{
+ genotype_.reset();
+}
+
+/******************************************************************************/
+
+bool Individual::hasGenotype() const
+{
+ return genotype_.get() != 0;
+}
+
+/******************************************************************************/
+
+void Individual::setMonolocusGenotype(size_t locus_position, const MonolocusGenotype& monogen) throw (Exception)
+{
+ if (!hasGenotype())
+ throw NullPointerException("Individual::setMonolocusGenotype: individual has no genotype.");
+ try
+ {
+ genotype_->setMonolocusGenotype(locus_position, monogen);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("Individual::setMonolocusGenotype: locus_position out of boubds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+/******************************************************************************/
+
+void Individual::setMonolocusGenotypeByAlleleKey(size_t locus_position, const std::vector<size_t> allele_keys) throw (Exception)
+{
+ if (!hasGenotype())
+ throw NullPointerException("Individual::setMonolocusGenotypeByAlleleKey: individual has no genotype.");
+ try
+ {
+ genotype_->setMonolocusGenotypeByAlleleKey(locus_position, allele_keys);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("Individual::setMonolocusGenotypeByAlleleKey: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (Exception)
+ {
+ throw Exception("Individual::setMonolocusGenotypeByAlleleKey: no key in allele_keys.");
+ }
+}
+
+/******************************************************************************/
+
+void Individual::setMonolocusGenotypeByAlleleId(size_t locus_position, const std::vector<std::string> allele_id, const LocusInfo& locus_info) throw (Exception)
+{
+ if (!hasGenotype())
+ throw NullPointerException("Individual::setMonolocusGenotypeByAlleleId: individual has no genotype.");
+ try
+ {
+ genotype_->setMonolocusGenotypeByAlleleId(locus_position, allele_id, locus_info);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("Individual::setMonolocusGenotypeByAlleleId: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (AlleleNotFoundException& anfe)
+ {
+ throw AlleleNotFoundException("Individual::setMonolocusGenotypeByAlleleId: id not found.", anfe.getIdentifier());
+ }
+}
+
+/******************************************************************************/
+
+const MonolocusGenotype& Individual::getMonolocusGenotype(size_t locus_position) throw (Exception)
+{
+ if (!hasGenotype())
+ throw NullPointerException("Individual::getMonolocusGenotype: individual has no genotype.");
+ try
+ {
+ return genotype_->getMonolocusGenotype(locus_position);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("Individual::getMonolocusGenotype: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+/******************************************************************************/
+
+size_t Individual::countNonMissingLoci() const throw (NullPointerException)
+{
+ if (!hasGenotype())
+ throw NullPointerException("Individual::countNonMissingLoci: individual has no genotype.");
+ return genotype_->countNonMissingLoci();
+}
+
+/******************************************************************************/
+
+size_t Individual::countHomozygousLoci() const throw (NullPointerException)
+{
+ if (!hasGenotype())
+ throw NullPointerException("Individual::countHomozygousLoci: individual has no genotype.");
+ return genotype_->countHomozygousLoci();
+}
+
+/******************************************************************************/
+
+size_t Individual::countHeterozygousLoci() const throw (NullPointerException)
+{
+ if (!hasGenotype())
+ throw NullPointerException("Individual::countHeterozygousLoci: individual has no genotype.");
+ return genotype_->countHeterozygousLoci();
+}
+
+/******************************************************************************/
+
diff --git a/src/Bpp/PopGen/Individual.h b/src/Bpp/PopGen/Individual.h
new file mode 100644
index 0000000..bfbaf8f
--- /dev/null
+++ b/src/Bpp/PopGen/Individual.h
@@ -0,0 +1,472 @@
+//
+// File Individual.h
+// Author : Sylvain Gaillard
+// Last modification : Tuesday August 03 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _INDIVIDUAL_H_
+#define _INDIVIDUAL_H_
+
+// From STL
+#include <vector>
+#include <memory>
+
+#include <Bpp/Graphics/Point2D.h>
+#include <Bpp/Exceptions.h>
+#include <Bpp/Text/TextTools.h>
+
+// From SeqLib
+#include <Bpp/Seq/Sequence.h>
+#include <Bpp/Seq/SequenceExceptions.h>
+#include <Bpp/Seq/Container/OrderedSequenceContainer.h>
+#include <Bpp/Seq/Container/MapSequenceContainer.h>
+
+// From PopGenLib
+#include "Locality.h"
+#include "Date.h"
+#include "MultilocusGenotype.h"
+#include "GeneralExceptions.h"
+
+namespace bpp
+{
+/**
+ * @brief The Individual class.
+ *
+ * This class is designed to store data on a single individual.
+ * This individual has only one sequence for each locus ... no information
+ * about diploid sequence data.
+ * See the no more in use MultiSeqIndividual documentation for an alternative.
+ *
+ * @author Sylvain Gaillard
+ */
+class Individual
+{
+protected:
+ std::string id_;
+ unsigned short sex_;
+ std::auto_ptr<Date> date_;
+ std::auto_ptr< Point2D<double> > coord_;
+ const Locality<double>* locality_;
+ std::auto_ptr<MapSequenceContainer> sequences_;
+ std::auto_ptr<MultilocusGenotype> genotype_;
+
+public:
+ // Constructors and destructor :
+ /**
+ * @brief Build a void new Individual.
+ */
+ Individual();
+
+ /**
+ * @brief Build a new Individual with an identifier.
+ */
+ Individual(const std::string& id);
+
+ /**
+ * @brief Build a new Individual with parameters.
+ *
+ * @param id The id of the Individual as a string.
+ * @param date The date of the Individual as a Date object.
+ * @param coord The coordinates of the Individual as a Point2D object.
+ * @param locality The locality of the Individual as a pointer to a Locality
+ * object.
+ * @param sex The sex of the Individual as an unsigned short.
+ */
+ Individual(const std::string& id,
+ const Date& date,
+ const Point2D<double>& coord,
+ Locality<double>* locality,
+ const unsigned short sex);
+
+ /**
+ * @brief The Individual copy constructor.
+ */
+ Individual(const Individual& ind);
+
+ /**
+ * @brief Destroy an Individual.
+ */
+ virtual ~Individual();
+
+public:
+ // Methods
+ /**
+ * @brief The Individual copy operator.
+ *
+ * @return A ref toward the assigned Individual.
+ * Make a copy of each atribute of the Individual.
+ */
+ Individual& operator=(const Individual& ind);
+
+ /**
+ * @brief Set the id of the Individual.
+ *
+ * @param id The id of the Individual as a string.
+ */
+ void setId(const std::string& id);
+
+ /**
+ * @brief Get the id of the Individual.
+ *
+ * @return The id of the Individual as a string.
+ */
+ const std::string& getId() const { return id_; }
+
+ /**
+ * @brief Set the sex of the Individual.
+ *
+ * @param sex An unsigned short coding for the sex.
+ */
+ void setSex(const unsigned short sex);
+
+ /**
+ * @brief Get the sex of the Individual.
+ *
+ * @return The sex of the Individual as an unsigned short.
+ */
+ unsigned short getSex() const { return sex_; }
+
+ /**
+ * @brief Set the date of the Individual.
+ *
+ * @param date The date as a Date object.
+ */
+ void setDate(const Date& date);
+
+ /**
+ * @brief Get the date of the Individual.
+ *
+ * @return A pointer toward a Date object if the Individual has a date.
+ * Otherwise throw a NullPointerException.
+ */
+ const Date& getDate() const throw (NullPointerException);
+
+ /**
+ * @brief Tell if this Individual has a date.
+ */
+ bool hasDate() const;
+
+ /**
+ * @brief Set the coodinates of the Individual.
+ *
+ * @param coord A Point2D object.
+ */
+ void setCoord(const Point2D<double>& coord);
+
+ /**
+ * @brief Set the coordinates of the Individual.
+ *
+ * @param x The X coordinate as a double.
+ * @param y The Y coordinate as a double.
+ */
+ void setCoord(const double x, const double y);
+
+ /**
+ * @brief Get the coordinates of the Induvidual.
+ *
+ * @return A pointer toward a Point2D object if the Individual has
+ * coordinates. Otherwise throw a NullPointerException.
+ */
+ const Point2D<double>& getCoord() const throw (NullPointerException);
+
+ /**
+ * @brief Tell if this Individual has coordinates.
+ */
+ bool hasCoord() const;
+
+ /**
+ * @brief Set the X coordinate of the Individual.
+ *
+ * @param x The X coordinate as a double.
+ *
+ * Set the X coordinate if the Individual has coordinates.
+ * Otherwise throw a NullPointerException.
+ */
+ void setX(const double x) throw (NullPointerException);
+
+ /**
+ * @brief Set the Y coordinate of th Individual.
+ *
+ * @param y The Y coordinate as a double.
+ *
+ * Set the Y coordinate if the Individual has coordinates.
+ * Otherwise throw a NullPointerException.
+ */
+ void setY(const double y) throw (NullPointerException);
+
+ /**
+ * @brief Get the X coordinate of the Individual.
+ *
+ * @return The X coordinate as a double if the Individual has coordinates.
+ * Otherwise throw a NullPointerException.
+ */
+ double getX() const throw (NullPointerException);
+
+ /**
+ * @brief Get the Y coordinate of the Individual.
+ *
+ * @return The Y coordinate as a double if the Individual has coordinates.
+ * Otherwise throw a NullPointerException.
+ */
+ double getY() const throw (NullPointerException);
+
+ /**
+ * @brief Set the locality of the Individual.
+ *
+ * @param locality A pointer to a Locality object.
+ */
+ void setLocality(const Locality<double>* locality);
+
+ /**
+ * @brief Get the locality of the Individual.
+ *
+ * @return A pointer to the Locality of the Individual.
+ */
+ const Locality<double>* getLocality() const throw (NullPointerException);
+
+ /**
+ * @brief Tell if this Individual has a locality.
+ */
+ bool hasLocality() const;
+
+ /**
+ * @brief Add a sequence to the Individual.
+ *
+ * Creates the sequence container when adding the first sequence.
+ * Otherwize add the sequence to the end of the sequence container.
+ *
+ * @param sequence_key the place where the sequence will be put.
+ * @param sequence The sequence to add.
+ * @throw AlphabetMismatchException if the sequence's alphabet doesn't match the container's alphabet.
+ * @throw BadIdentifierException if sequence's name is already in use.
+ * @throw BadIntegerException if sequence_position is already in use.
+ */
+ void addSequence(size_t sequence_key, const Sequence& sequence)
+ throw (Exception);
+
+ /**
+ * @brief Get a sequence by its name.
+ *
+ * @param sequence_name The name of the sequence.
+ * @return A reference to the sequence.
+ * @throw NullPointerException if there is no sequence container defined.
+ * @throw SequenceNotFoundException if sequence_name is not found.
+ */
+ const Sequence& getSequenceByName(const std::string& sequence_name)
+ const throw (Exception);
+
+ /**
+ * @brief Get a sequence by its position.
+ *
+ * @param sequence_position The position of the sequence in the sequence set.
+ * @return A reference to the sequence.
+ * @throw NullPointerException if there is no sequence container defined.
+ * @throw SequenceNotFoundException if sequence_position is not found (i.e. missing data or not used).
+ */
+ const Sequence& getSequenceAtPosition(const size_t sequence_position)
+ const throw (Exception);
+
+ /**
+ * @brief Delete a sequence.
+ *
+ * @param sequence_name The name of the sequence.
+ * @throw NullPointerException if there is no sequence container defined.
+ * @throw SequenceNotFoundException if sequence_name is not found.
+ */
+ void deleteSequenceByName(const std::string& sequence_name) throw (Exception);
+
+ /**
+ * @brief Delete a sequence.
+ *
+ * @param sequence_position The position of the sequence.
+ * @throw NullPointerException if there is no sequence container defined.
+ * @throw SequenceNotFoundException if sequence_postion is not found.
+ */
+ void deleteSequenceAtPosition(size_t sequence_position) throw (Exception);
+
+ /**
+ * @brief Tell if the Individual has some sequences.
+ *
+ * @return TRUE if the individual has at least one sequence.
+ * @return FALSE if the container is empty or undifined.
+ */
+ bool hasSequences() const;
+
+ /**
+ * @brief Tell if the Individual has a sequence at a given position.
+ */
+ bool hasSequenceAtPosition(size_t position) const;
+
+ /**
+ * @brief Return the alphabet of the sequences.
+ *
+ * @throw NullPointerException if there is no sequence container defined.
+ */
+ const Alphabet* getSequenceAlphabet() const throw (NullPointerException);
+
+ /**
+ * @brief Get the sequences' names.
+ *
+ * @return All the sequences' names of the individual in a vector of string.
+ * @throw NullPointerException if there is no sequence container defined.
+ */
+ std::vector<std::string> getSequencesNames() const throw (NullPointerException);
+
+ /**
+ * @brief Get the sequences' positions.
+ *
+ * @return All the positions where a sequence is found.
+ * @throw NullPointerException if there is no sequence container defined.
+ */
+ std::vector<size_t> getSequencesPositions() const throw (NullPointerException);
+
+ /**
+ * @brief Get the position of a sequence.
+ *
+ * @throw NullPointerException if there is no sequence container defined.
+ * @throw SequenceNotFoundException if sequence_name is not found.
+ */
+ size_t getSequencePosition(const std::string& sequence_name) const throw (Exception);
+
+ /**
+ * @brief Get the number of sequences.
+ */
+ size_t getNumberOfSequences() const;
+
+ /**
+ * @brief Set all the sequences with a MapSequenceContainer.
+ */
+ void setSequences(const MapSequenceContainer& msc);
+
+ /**
+ * @brief Get a reference to the sequence container.
+ *
+ * @throw NullPointerException if there is no sequence container defined.
+ */
+ const OrderedSequenceContainer& getSequences() const throw (NullPointerException);
+
+ /**
+ * @brief Set a genotype.
+ *
+ * @param genotype The MultilocusGenotype which will be copied.
+ */
+ void setGenotype(const MultilocusGenotype& genotype);
+
+ /**
+ * @brief Init the genotype.
+ *
+ * @throw Exception if the Individual already has a Genotype.
+ * @throw BadIntegerException if loci_number < 1.
+ */
+ void initGenotype(size_t loci_number) throw (Exception);
+
+ /**
+ * @brief Get the genotype.
+ */
+ const MultilocusGenotype& getGenotype() const throw (NullPointerException);
+
+ /**
+ * @brief Delete the genotype of the individual.
+ */
+ void deleteGenotype();
+
+ /**
+ * @brief Tell if the Individual has a MultilocusGenotype.
+ */
+ bool hasGenotype() const;
+
+ /**
+ * @brief Set a MonolocusGenotype.
+ *
+ * @throw NullPointerException if there is no genotype defined.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci.
+ */
+ void setMonolocusGenotype(size_t locus_position, const MonolocusGenotype& monogen)
+ throw (Exception);
+
+ /**
+ * @brief Set a MonolocusGenotype.
+ *
+ * @throw NullPointerException if there is no genotype defined.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci.
+ * @throw Exception if there is no key in allele_keys.
+ */
+ void setMonolocusGenotypeByAlleleKey(size_t locus_position, const std::vector<size_t> allele_keys)
+ throw (Exception);
+
+ /**
+ * @brief Set a MonolocusGenotype.
+ *
+ * @throw NullPointerException if there is no genotype defined.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci.
+ * @throw AlleleNotFoundException if at least one the id is not found in the LocusInfo.
+ */
+ void setMonolocusGenotypeByAlleleId(size_t locus_position, const std::vector<std::string> allele_id, const LocusInfo& locus_info)
+ throw (Exception);
+
+ /**
+ * @brief Get a MonolocusGenotype.
+ *
+ * @throw NullPointerException if there is no genotype defined.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci.
+ */
+ const MonolocusGenotype& getMonolocusGenotype(size_t locus_position) throw (Exception);
+
+ /**
+ * @brief Count the number of non missing MonolocusGenotype.
+ *
+ * @throw NullPointerException if there is no genotype defined.
+ */
+ size_t countNonMissingLoci() const throw (NullPointerException);
+
+ /**
+ * @brief Count the number of homozygous MonolocusGenotype.
+ *
+ * @throw NullPointerException if there is no genotype defined.
+ */
+ size_t countHomozygousLoci() const throw (NullPointerException);
+
+ /**
+ * @brief Count the number of heterozygous MonolocusGenotype.
+ *
+ * @throw NullPointerException if there is no genotype defined.
+ */
+ size_t countHeterozygousLoci() const throw (NullPointerException);
+};
+} // end of namespace bpp;
+
+#endif // _INDIVIDUAL_H_
+
diff --git a/src/Bpp/PopGen/Locality.h b/src/Bpp/PopGen/Locality.h
new file mode 100644
index 0000000..50cda1d
--- /dev/null
+++ b/src/Bpp/PopGen/Locality.h
@@ -0,0 +1,130 @@
+//
+// File Locality.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _LOCALITY_H_
+#define _LOCALITY_H_
+
+// From std lib
+#include <string>
+
+#include <Bpp/Graphics/Point2D.h>
+
+namespace bpp
+{
+/**
+ * @brief The Locality class.
+ *
+ * This is a class derivated from the Point2D class.
+ * It's a Point2D with a name.
+ *
+ * @author Sylvain Gaillard
+ */
+template<class T> class Locality :
+ public bpp::Point2D<T>
+{
+protected:
+ std::string name_;
+
+public:
+ // Constructors and destructor
+ /**
+ * @brief Build a new locality with name and coordinates.
+ *
+ * @param name The name of the locality.
+ * @param x The longitude.
+ * @param y The latitude.
+ */
+ Locality<T>(const std::string name, const T x = 0, const T y = 0) :
+ bpp::Point2D<T>(x, y),
+ name_(name) {}
+
+ /**
+ * @brief Build a new locality with name and coordinates.
+ *
+ * @param name The name of the locality.
+ * @param coord The coordinates of the locality.
+ */
+ Locality<T>(const std::string name, const bpp::Point2D<T> &coord) :
+ bpp::Point2D<T>(coord),
+ name_(name) {}
+
+ /**
+ * @brief Destroy a locality.
+ */
+ virtual ~Locality<T>() {}
+
+public:
+ // Methodes
+ /**
+ * @brief Implements the Clonable interface.
+ */
+ Locality<T>* clone() const { return new Locality<T>(*this); }
+
+ /**
+ * @brief The == operator.
+ *
+ * returns true if both name and coordinates are identical between the two Locality objects.
+ */
+ virtual bool operator==(const Locality<T>& locality) const
+ {
+ return this->getX() == locality.getX() && this->getY() == locality.getY() && name_ == locality.name_;
+ }
+
+ /**
+ * @brief The != operator.
+ */
+ virtual bool operator!=(const Locality<T>& locality) const
+ {
+ return !(locality == *this);
+ }
+
+ /**
+ * @brief Set the name of the locality.
+ */
+ void setName(const std::string& name) { name_ = name; }
+
+ /**
+ * @brief Get the name of the locality.
+ */
+ const std::string& getName() const { return name_; }
+};
+} // end of namespace bpp;
+
+#endif // _LOCALITY_H_
+
diff --git a/src/Bpp/PopGen/LocusInfo.cpp b/src/Bpp/PopGen/LocusInfo.cpp
new file mode 100644
index 0000000..0d4a57b
--- /dev/null
+++ b/src/Bpp/PopGen/LocusInfo.cpp
@@ -0,0 +1,135 @@
+//
+// File LocusInfo.cpp
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include <Bpp/Text/TextTools.h>
+
+#include "LocusInfo.h"
+#include "GeneralExceptions.h"
+
+using namespace bpp;
+using namespace std;
+
+unsigned int LocusInfo::HAPLODIPLOID = 0;
+unsigned int LocusInfo::HAPLOID = 1;
+unsigned int LocusInfo::DIPLOID = 2;
+unsigned int LocusInfo::UNKNOWN = 9999;
+
+// ** Class constructor: *******************************************************/
+
+LocusInfo::LocusInfo(const std::string& name, const unsigned int ploidy) : name_(name),
+ ploidy_(ploidy),
+ alleles_(vector<AlleleInfo*>()) {}
+
+LocusInfo::LocusInfo(const LocusInfo& locus_info) : name_(locus_info.getName()),
+ ploidy_(locus_info.getPloidy()),
+ alleles_(vector<AlleleInfo*>(locus_info.getNumberOfAlleles()))
+{
+ for (unsigned int i = 0; i < locus_info.getNumberOfAlleles(); i++)
+ {
+ alleles_[i] = dynamic_cast<AlleleInfo*>(locus_info.getAlleleInfoByKey(i).clone());
+ }
+}
+
+// ** Class destructor: *******************************************************/
+
+LocusInfo::~LocusInfo()
+{
+ for (unsigned int i = 0; i < alleles_.size(); i++)
+ {
+ delete alleles_[i];
+ }
+ alleles_.clear();
+}
+
+// ** Other methodes: *********************************************************/
+
+// AlleleInfos
+void LocusInfo::addAlleleInfo(const AlleleInfo& allele) throw (BadIdentifierException)
+{
+ // Check if the allele id is not already in use
+ for (unsigned int i = 0; i < alleles_.size(); i++)
+ {
+ if (alleles_[i]->getId() == allele.getId())
+ throw BadIdentifierException("LocusInfo::addAlleleInfo: Id already in use.", allele.getId());
+ }
+ alleles_.push_back(allele.clone());
+}
+
+const AlleleInfo& LocusInfo::getAlleleInfoById(const std::string& id) const throw (AlleleNotFoundException)
+{
+ for (unsigned int i = 0; i < alleles_.size(); i++)
+ {
+ if (alleles_[i]->getId() == id)
+ return *(alleles_[i]);
+ }
+ throw AlleleNotFoundException("LocusInfo::getAlleleInfoById: AlleleInfo id unknown.", id);
+}
+
+const AlleleInfo& LocusInfo::getAlleleInfoByKey(size_t key) const throw (IndexOutOfBoundsException)
+{
+ if (key >= alleles_.size())
+ throw IndexOutOfBoundsException("LocusInfo::getAlleleInfoByKey: key out of bounds.", key, 0, alleles_.size());
+ return *(alleles_[key]);
+}
+
+unsigned int LocusInfo::getAlleleInfoKey(const std::string& id) const
+throw (AlleleNotFoundException)
+{
+ for (unsigned int i = 0; i < alleles_.size(); i++)
+ {
+ if (alleles_[i]->getId() == id)
+ return i;
+ }
+ throw AlleleNotFoundException("LocusInfo::getAlleleInfoKey: AlleleInfo id not found.", id);
+}
+
+size_t LocusInfo::getNumberOfAlleles() const
+{
+ return alleles_.size();
+}
+
+void LocusInfo::clear()
+{
+ for (unsigned int i = 0; i < alleles_.size(); i++)
+ {
+ delete alleles_[i];
+ }
+ alleles_.clear();
+}
+
diff --git a/src/Bpp/PopGen/LocusInfo.h b/src/Bpp/PopGen/LocusInfo.h
new file mode 100644
index 0000000..bf49257
--- /dev/null
+++ b/src/Bpp/PopGen/LocusInfo.h
@@ -0,0 +1,155 @@
+//
+// File LocusInfo.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _LOCUSINFO_H_
+#define _LOCUSINFO_H_
+
+// From STL
+#include <string>
+#include <vector>
+
+// From local Popgenlib
+#include "AlleleInfo.h"
+#include "GeneralExceptions.h"
+
+#include <Bpp/Exceptions.h>
+
+namespace bpp
+{
+/**
+ * @brief The LocusInfo class.
+ *
+ * This is an AlleleInfo container with additionnal data like a name,
+ * the ploidy and some comments.
+ *
+ * @author Sylvain Gaillard
+ */
+class LocusInfo
+{
+private:
+ std::string name_;
+ unsigned int ploidy_;
+ std::vector<AlleleInfo*> alleles_;
+
+public:
+ static unsigned int HAPLODIPLOID;
+ static unsigned int HAPLOID;
+ static unsigned int DIPLOID;
+ static unsigned int UNKNOWN;
+
+public:
+ // Constructors and destructor
+ /**
+ * @brief Build a new LocusInfo object.
+ *
+ * @param name The name of the locus.
+ * @param ploidy The ploidy of the locus.
+ */
+ LocusInfo(const std::string& name, const unsigned int ploidy = DIPLOID);
+
+ /**
+ * @brief Copy constructor.
+ */
+ LocusInfo(const LocusInfo& locus_info);
+
+ /**
+ * @brief Destroy the LocusInfo.
+ */
+ virtual ~LocusInfo();
+
+public:
+ // Methodes
+ /**
+ * @brief Get the name of the locus.
+ */
+ const std::string& getName() const { return name_; }
+
+ /**
+ * @brief Get the ploidy of the locus.
+ *
+ * @return The ploidy as an unsigned integer.
+ */
+ unsigned int getPloidy() const { return ploidy_; }
+
+ /**
+ * @brief Add an AlleleInfo to the LocusInfo.
+ *
+ * @throw BadIdentifierException if the AlleleInfo's id already exists.
+ */
+ void addAlleleInfo(const AlleleInfo& allele)
+ throw (BadIdentifierException);
+
+ /**
+ * @brief Retrieve an AlleleInfo object of the LocusInfo.
+ *
+ * @throw AlleleNotFoundException if the id is not found.
+ */
+ const AlleleInfo& getAlleleInfoById(const std::string& id) const
+ throw (AlleleNotFoundException);
+
+ /**
+ * @brief Retrieve an AlleleInfo object of the LocusInfo.
+ *
+ * @throw IndexOutOfBoundsException if key excedes the number of alleles.
+ */
+ const AlleleInfo& getAlleleInfoByKey(size_t key) const
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the position of an AlleleInfo.
+ *
+ * @throw AlleleNotFoundException if the AlleleInfo's id is not found.
+ */
+ unsigned int getAlleleInfoKey(const std::string& id) const
+ throw (AlleleNotFoundException);
+
+ /**
+ * @brief Get the number of alleles at this locus.
+ */
+ size_t getNumberOfAlleles() const;
+
+ /**
+ * @brief Delete all alleles from the locus.
+ */
+ void clear();
+};
+} // end of namespace bpp;
+
+#endif // _LOCUSINFO_H_
+
diff --git a/src/Bpp/PopGen/MonoAlleleMonolocusGenotype.cpp b/src/Bpp/PopGen/MonoAlleleMonolocusGenotype.cpp
new file mode 100644
index 0000000..b89aa8b
--- /dev/null
+++ b/src/Bpp/PopGen/MonoAlleleMonolocusGenotype.cpp
@@ -0,0 +1,86 @@
+//
+// File MonoAlleleMonolocusGenotype.cpp
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "MonoAlleleMonolocusGenotype.h"
+
+using namespace bpp;
+using namespace std;
+
+// ** Class constructor: *******************************************************/
+
+MonoAlleleMonolocusGenotype::MonoAlleleMonolocusGenotype(size_t allele_index) : allele_index_(allele_index) {}
+
+MonoAlleleMonolocusGenotype::MonoAlleleMonolocusGenotype(std::vector<size_t> allele_index) throw (BadSizeException) : allele_index_(0)
+{
+ if (allele_index.size() != 1)
+ throw BadSizeException("MonoAlleleMonolocusGenotype::MonoAlleleMonolocusGenotype: allele_index must conain one value.", allele_index.size(), 1);
+ allele_index_ = allele_index[0];
+}
+
+MonoAlleleMonolocusGenotype::MonoAlleleMonolocusGenotype(const MonoAlleleMonolocusGenotype& mmg) : allele_index_(mmg.getAlleleIndex()[0]) {}
+
+// ** Class destructor: ********************************************************/
+
+MonoAlleleMonolocusGenotype::~MonoAlleleMonolocusGenotype() {}
+
+// ** Other methodes: **********************************************************/
+
+MonoAlleleMonolocusGenotype& MonoAlleleMonolocusGenotype::operator=(const MonoAlleleMonolocusGenotype& mmg)
+{
+ allele_index_ = mmg.getAlleleIndex()[0];
+ return *this;
+}
+
+bool MonoAlleleMonolocusGenotype::operator==(const MonoAlleleMonolocusGenotype& mmg) const
+{
+ return allele_index_ == mmg.getAlleleIndex()[0];
+}
+
+std::vector<size_t> MonoAlleleMonolocusGenotype::getAlleleIndex() const
+{
+ vector<size_t> index;
+ index.push_back(allele_index_);
+ return index;
+}
+
+MonoAlleleMonolocusGenotype* MonoAlleleMonolocusGenotype::clone() const
+{
+ return new MonoAlleleMonolocusGenotype(*this);
+}
+
diff --git a/src/Bpp/PopGen/MonoAlleleMonolocusGenotype.h b/src/Bpp/PopGen/MonoAlleleMonolocusGenotype.h
new file mode 100644
index 0000000..21cec75
--- /dev/null
+++ b/src/Bpp/PopGen/MonoAlleleMonolocusGenotype.h
@@ -0,0 +1,113 @@
+//
+// File MonoAlleleMonolocusGenotype.h
+// Author : Sylvain Gaillard <yragael2001 at yahoo.fr>
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _MONOALLELEMONOLOCUSGENOTYPE_H_
+#define _MONOALLELEMONOLOCUSGENOTYPE_H_
+
+#include <Bpp/Exceptions.h>
+
+// From local
+#include "MonolocusGenotype.h"
+
+namespace bpp
+{
+/**
+ * @brief The MonoAlleleMonolocusGenotype class.
+ *
+ * @author Sylvain Gaillard
+ */
+class MonoAlleleMonolocusGenotype :
+ public MonolocusGenotype
+{
+private:
+ size_t allele_index_;
+
+public:
+ // Constructors and destructor
+ /**
+ * @brief Build a monolocus genotype containing one allele.
+ */
+ MonoAlleleMonolocusGenotype(size_t allele_index);
+
+ /**
+ * @brief Build a monolocus genotype containing one allele.
+ */
+ MonoAlleleMonolocusGenotype(std::vector<size_t> allele_index) throw (BadSizeException);
+
+ /**
+ * @brief Copy constructor.
+ */
+ MonoAlleleMonolocusGenotype(const MonoAlleleMonolocusGenotype& mmg);
+
+ /**
+ * @brief Destroy the MonoAlleleMonolocusGenotype.
+ */
+ ~MonoAlleleMonolocusGenotype();
+
+public:
+ // Other methodes
+ /**
+ * @brief The affectation operator.
+ */
+ MonoAlleleMonolocusGenotype& operator=(const MonoAlleleMonolocusGenotype& mmg);
+
+ /**
+ * @brief The == operator.
+ */
+ virtual bool operator==(const MonoAlleleMonolocusGenotype& mmg) const;
+
+ /**
+ * @name The MonolocusGenotype interface:
+ *
+ * @{
+ */
+ std::vector<size_t> getAlleleIndex() const;
+ /** @} */
+
+ /**
+ * @name The Clonable interface:
+ *
+ * @{
+ */
+ MonoAlleleMonolocusGenotype* clone() const;
+ /** @} */
+};
+} // end of namespace bpp;
+
+#endif // _MONOALLELEMONOLOCUSGENOTYPE_H_
diff --git a/src/Bpp/PopGen/MonolocusGenotype.h b/src/Bpp/PopGen/MonolocusGenotype.h
new file mode 100644
index 0000000..11bc8b6
--- /dev/null
+++ b/src/Bpp/PopGen/MonolocusGenotype.h
@@ -0,0 +1,86 @@
+//
+// File MonolocusGenotype.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _MONOLOCUSGENOTYPE_H_
+#define _MONOLOCUSGENOTYPE_H_
+
+// From STL
+#include <vector>
+
+#include <Bpp/Clonable.h>
+
+namespace bpp
+{
+/**
+ * @brief The MonolocusGenotype virtual class.
+ *
+ * A MonolocusGenotype containes the Alleles' keys defined in a Locus object.
+ * This keys are returned as size_tegers.
+ * This class is an interface for all monolocus genotypes.
+ *
+ * @author Sylvain Gaillard
+ */
+class MonolocusGenotype :
+ public Clonable
+{
+public:
+ // Constructors and Destructor
+ /**
+ * @brief Destroy a MonolocusGenotype.
+ */
+ virtual ~MonolocusGenotype() {}
+
+public:
+ // Methodes
+ /**
+ * @brief Get the alleles' index.
+ *
+ * The alleles' index are the position of the AlleleInfo in a LocusInfo object.
+ * If no LocusInfo is used, the index are just numbers to identify the alleles.
+ *
+ * @return A vector of size_t.
+ *
+ * The size of the vector corresponds to the number of alleles at this locus.
+ */
+ virtual std::vector<size_t> getAlleleIndex() const = 0;
+};
+} // end of namespace bpp;
+
+#endif // _MONOLOCUSGENOTYPE_H_
+
diff --git a/src/Bpp/PopGen/MonolocusGenotypeTools.cpp b/src/Bpp/PopGen/MonolocusGenotypeTools.cpp
new file mode 100644
index 0000000..3ee0b2e
--- /dev/null
+++ b/src/Bpp/PopGen/MonolocusGenotypeTools.cpp
@@ -0,0 +1,61 @@
+//
+// File MonolocusGenotypeTools.cpp
+// Author: Sylvain Gaillard <sylvain.gaillard at angers.inra.fr>
+// Created on: April 4, 2008
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (April 4, 2008)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+// From Pop
+#include "MonoAlleleMonolocusGenotype.h"
+#include "BiAlleleMonolocusGenotype.h"
+#include "MultiAlleleMonolocusGenotype.h"
+
+#include "MonolocusGenotypeTools.h"
+
+using namespace bpp;
+using namespace std;
+
+std::auto_ptr<MonolocusGenotype> MonolocusGenotypeTools::buildMonolocusGenotypeByAlleleKey(const std::vector<size_t> allele_keys) throw (Exception)
+{
+ if (allele_keys.size() < 1)
+ throw Exception("MonolocusGenotypeTools::buildMonolocusGenotypeByAlleleKey: no key in allele_keys.");
+
+ if (allele_keys.size() == 1)
+ return auto_ptr<MonolocusGenotype>(new MonoAlleleMonolocusGenotype(allele_keys));
+ if (allele_keys.size() == 2)
+ return auto_ptr<MonolocusGenotype>(new BiAlleleMonolocusGenotype(allele_keys));
+ // for all other cases (allele_keys.size() > 2)
+ return auto_ptr<MonolocusGenotype>(new MultiAlleleMonolocusGenotype(allele_keys));
+}
diff --git a/src/Bpp/PopGen/MonolocusGenotypeTools.h b/src/Bpp/PopGen/MonolocusGenotypeTools.h
new file mode 100644
index 0000000..6e38fb4
--- /dev/null
+++ b/src/Bpp/PopGen/MonolocusGenotypeTools.h
@@ -0,0 +1,80 @@
+//
+// File MonolocusGenotypeTools.h
+// Author : Sylvain Gaillard <sylvain.gaillard at angers.inra.fr>
+// Last modification : April 4, 2008
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (April 4, 2008)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+// Secured inclusion of header's file
+#ifndef _MonolocusGenotypeTools_h_
+#define _MonolocusGenotypeTools_h_
+
+// From STL
+#include <vector>
+#include <memory>
+
+#include <Bpp/Exceptions.h>
+
+// From Pop
+#include "MonolocusGenotype.h"
+
+namespace bpp
+{
+/**
+ * @brief The MonolocusGenotypeTools static class.
+ *
+ * This class provides tools for MonolocusGenotype manipulation or creation.
+ *
+ * @author Sylvain Gaillard
+ */
+class MonolocusGenotypeTools
+{
+public:
+ /**
+ * @brief Build a proper MonolocusGenotype accordig to the number of alleles.
+ *
+ * Return a MonolocusGenotype build according to the number of allels.
+ * If one allele key, send a MonoAlleleMonolocusGenotype,
+ * if two allele keys, send a BiAlleleMonolocusGenotype,
+ * if more allele keys, send a MultiAlleleMonolocusGenotype.
+ *
+ * @param allele_keys A vector containing thes allele keys to put in the MonolocusGenotype.
+ * @return A MonolocusGenotype according to the number of alleles
+ */
+ static std::auto_ptr<MonolocusGenotype> buildMonolocusGenotypeByAlleleKey(const std::vector<size_t> allele_keys) throw (Exception);
+};
+} // end of namespace bpp;
+
+#endif // _MonolocusGenotypeTools_h_
+
diff --git a/src/Bpp/PopGen/MultiAlleleMonolocusGenotype.cpp b/src/Bpp/PopGen/MultiAlleleMonolocusGenotype.cpp
new file mode 100644
index 0000000..59ab421
--- /dev/null
+++ b/src/Bpp/PopGen/MultiAlleleMonolocusGenotype.cpp
@@ -0,0 +1,106 @@
+//
+// File MultiAlleleMonolocusGenotype.cpp
+// Author : Sylvain Gaillard <sylvain.gaillard at angers.inra.fr>
+// Last modification : Wednesday March 5 2008
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "MultiAlleleMonolocusGenotype.h"
+
+using namespace bpp;
+using namespace std;
+
+// ** Class constructor: *******************************************************/
+
+MultiAlleleMonolocusGenotype::MultiAlleleMonolocusGenotype(std::vector<size_t> allele_index) : allele_index_(vector<size_t>(allele_index.size()))
+{
+ for (size_t i = 0; i < allele_index.size(); ++i)
+ {
+ allele_index_[i] = allele_index[i];
+ }
+}
+
+MultiAlleleMonolocusGenotype::MultiAlleleMonolocusGenotype(const MultiAlleleMonolocusGenotype& mmg) : allele_index_(vector<size_t>(mmg.allele_index_.size()))
+{
+ for (size_t i = 0; i < mmg.getAlleleIndex().size(); ++i)
+ {
+ allele_index_[i] = mmg.getAlleleIndex()[i];
+ }
+}
+
+// ** Class destructor: ********************************************************/
+
+MultiAlleleMonolocusGenotype::~MultiAlleleMonolocusGenotype()
+{
+ allele_index_.clear();
+}
+
+// ** Other methodes: **********************************************************/
+
+MultiAlleleMonolocusGenotype& MultiAlleleMonolocusGenotype::operator=(const MultiAlleleMonolocusGenotype& mmg)
+{
+ for (size_t i = 0; i < mmg.getAlleleIndex().size(); ++i)
+ {
+ allele_index_.push_back(mmg.getAlleleIndex()[i]);
+ }
+ return *this;
+}
+
+bool MultiAlleleMonolocusGenotype::operator==(const MultiAlleleMonolocusGenotype& mmg) const
+{
+ return (allele_index_[0] == mmg.getAlleleIndex()[0] && allele_index_[1] == mmg.getAlleleIndex()[1])
+ || (allele_index_[0] == mmg.getAlleleIndex()[1] && allele_index_[1] == mmg.getAlleleIndex()[0]);
+}
+
+bool MultiAlleleMonolocusGenotype::isHomozygous() const
+{
+ for (size_t i = 1; i < allele_index_.size(); ++i)
+ {
+ if (allele_index_[i - 1] != allele_index_[i])
+ return false;
+ }
+ return true;
+}
+
+std::vector<size_t> MultiAlleleMonolocusGenotype::getAlleleIndex() const
+{
+ return allele_index_;
+}
+
+MultiAlleleMonolocusGenotype* MultiAlleleMonolocusGenotype::clone() const
+{
+ return new MultiAlleleMonolocusGenotype(*this);
+}
+
diff --git a/src/Bpp/PopGen/MultiAlleleMonolocusGenotype.h b/src/Bpp/PopGen/MultiAlleleMonolocusGenotype.h
new file mode 100644
index 0000000..5088add
--- /dev/null
+++ b/src/Bpp/PopGen/MultiAlleleMonolocusGenotype.h
@@ -0,0 +1,121 @@
+//
+// File MultiAlleleMonolocusGenotype.h
+// Author : Sylvain Gaillard <sylvain.gaillard at angers.inra.fr>
+// Last modification : Wednesday March 5 2008
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+// Secured inclusion of header's file
+#ifndef _MULTIALLELEMONOLOCUSGENOTYPE_H_
+#define _MULTIALLELEMONOLOCUSGENOTYPE_H_
+
+// From STL
+#include <vector>
+
+#include <Bpp/Exceptions.h>
+
+// From local
+#include "MonolocusGenotype.h"
+
+namespace bpp
+{
+/**
+ * @brief The MultiAlleleMonolocusGenotype class.
+ *
+ * This class is intended to handle monolocus genotype with many alleles
+ * like polyploid loci or loci obtained by trace file without cutoff on
+ * peaks or other filter.
+ *
+ * @author Sylvain Gaillard
+ */
+class MultiAlleleMonolocusGenotype :
+ public MonolocusGenotype
+{
+private:
+ std::vector<size_t> allele_index_;
+
+public:
+ // Constructors and destructor
+ /**
+ * @brief Build a monolocus genotype containing many alleles.
+ */
+ MultiAlleleMonolocusGenotype(std::vector<size_t> allele_index);
+
+ /**
+ * @brief Copy constructor.
+ */
+ MultiAlleleMonolocusGenotype(const MultiAlleleMonolocusGenotype& mmg);
+
+ /**
+ * @brief Destroy the MultiAlleleMonolocusGenotype.
+ */
+ ~MultiAlleleMonolocusGenotype();
+
+public:
+ // Other methodes
+ /**
+ * @brief The affectation operator.
+ */
+ MultiAlleleMonolocusGenotype& operator=(const MultiAlleleMonolocusGenotype& mmg);
+
+ /**
+ * @brief The == operator.
+ */
+ bool operator==(const MultiAlleleMonolocusGenotype& mmg) const;
+
+ /**
+ * @brief Test the homozygozity of the locus (i.e. all allele are identical).
+ */
+ bool isHomozygous() const;
+
+ /**
+ * @name The MonolocusGenotype interface:
+ *
+ * @{
+ */
+ std::vector<size_t> getAlleleIndex() const;
+ /** @} */
+
+ /**
+ * @name The Clonable interface:
+ *
+ * @{
+ */
+ MultiAlleleMonolocusGenotype* clone() const;
+ /** @} */
+};
+} // end of namespace bpp;
+
+#endif // _MULTIALLELEMONOLOCUSGENOTYPE_H_
+
diff --git a/src/Bpp/PopGen/MultiSeqIndividual.cpp b/src/Bpp/PopGen/MultiSeqIndividual.cpp
new file mode 100644
index 0000000..0f8a9e4
--- /dev/null
+++ b/src/Bpp/PopGen/MultiSeqIndividual.cpp
@@ -0,0 +1,499 @@
+//
+// File MultiSeqIndividual.cpp
+// Author : Sylvain Gaillard
+// Last modification : Tuesday August 03 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "MultiSeqIndividual.h"
+
+using namespace bpp;
+using namespace std;
+
+// ** Class constructor: *******************************************************/
+
+MultiSeqIndividual::MultiSeqIndividual() : id_(""),
+ sex_(0),
+ date_(0),
+ coord_(0),
+ locality_(0),
+ sequences_(map<string, VectorSequenceContainer*>()),
+ genotype_(0) {}
+
+MultiSeqIndividual::MultiSeqIndividual(const std::string& id) : id_(id),
+ sex_(0),
+ date_(0),
+ coord_(0),
+ locality_(0),
+ sequences_(map<string, VectorSequenceContainer*>()),
+ genotype_(0) {}
+
+MultiSeqIndividual::MultiSeqIndividual(
+ const std::string& id,
+ const Date& date,
+ const Point2D<double>& coord,
+ Locality<double>* locality,
+ const unsigned short sex) :
+ id_(id),
+ sex_(sex),
+ date_(new Date(date)),
+ coord_(new Point2D<double>(coord)),
+ locality_(locality),
+ sequences_(map<string, VectorSequenceContainer*>()),
+ genotype_(0) {}
+
+MultiSeqIndividual::MultiSeqIndividual(const MultiSeqIndividual& ind) : id_(ind.getId()),
+ sex_(ind.getSex()),
+ date_(0),
+ coord_(0),
+ locality_(0),
+ sequences_(map<string, VectorSequenceContainer*>()),
+ genotype_(0)
+{
+ try
+ {
+ setDate(*ind.getDate());
+ }
+ catch (NullPointerException)
+ {
+ date_ = 0;
+ }
+ try
+ {
+ setCoord(*ind.getCoord());
+ }
+ catch (NullPointerException)
+ {
+ coord_ = 0;
+ }
+ try
+ {
+ setLocality(ind.getLocality());
+ }
+ catch (NullPointerException)
+ {
+ locality_ = 0;
+ }
+ if (ind.hasSequences())
+ {
+ vector<string> keys = ind.getSequencesKeys();
+ for (size_t i = 0; i < keys.size(); i++)
+ {
+ sequences_[keys[i]] = new VectorSequenceContainer(*const_cast<const VectorSequenceContainer*>(ind.getVectorSequenceContainer(keys[i])));
+ }
+ }
+ genotype_ = ind.hasGenotype() ? new MultilocusGenotype(*ind.getGenotype()) : 0;
+}
+
+// ** Class destructor: *******************************************************/
+
+MultiSeqIndividual::~MultiSeqIndividual()
+{
+ delete date_;
+ delete coord_;
+}
+
+// ** Other methodes: *********************************************************/
+
+MultiSeqIndividual& MultiSeqIndividual::operator=(const MultiSeqIndividual& ind)
+{
+ setId(ind.getId());
+ setSex(ind.getSex());
+ try
+ {
+ setDate(*ind.getDate());
+ }
+ catch (NullPointerException)
+ {
+ date_ = 0;
+ }
+ try
+ {
+ setCoord(*ind.getCoord());
+ }
+ catch (NullPointerException)
+ {
+ coord_ = 0;
+ }
+ try
+ {
+ setLocality(ind.getLocality());
+ }
+ catch (NullPointerException)
+ {
+ locality_ = 0;
+ }
+ if (ind.hasSequences())
+ {
+ vector<string> keys = ind.getSequencesKeys();
+ for (size_t i = 0; i < keys.size(); i++)
+ {
+ sequences_[keys[i]] = new VectorSequenceContainer(*const_cast<const VectorSequenceContainer*>(ind.getVectorSequenceContainer(keys[i])));
+ }
+ }
+ genotype_ = ind.hasGenotype() ? new MultilocusGenotype(*ind.getGenotype()) : 0;
+ return *this;
+}
+
+/******************************************************************************/
+
+// Id
+void MultiSeqIndividual::setId(const std::string id)
+{
+ id_ = id;
+}
+
+/******************************************************************************/
+
+std::string MultiSeqIndividual::getId() const
+{
+ return id_;
+}
+
+/******************************************************************************/
+
+// Sex
+void MultiSeqIndividual::setSex(const unsigned short sex)
+{
+ sex_ = sex;
+}
+
+/******************************************************************************/
+
+unsigned short MultiSeqIndividual::getSex() const
+{
+ return sex_;
+}
+
+/******************************************************************************/
+
+// Date
+void MultiSeqIndividual::setDate(const Date& date)
+{
+ if (!hasDate())
+ {
+ date_ = new Date(date);
+ }
+ else if (*date_ != date)
+ {
+ delete date_;
+ date_ = new Date(date);
+ }
+}
+
+/******************************************************************************/
+
+const Date* MultiSeqIndividual::getDate() const throw (NullPointerException)
+{
+ if (hasDate())
+ return new Date(*date_);
+ else
+ throw (NullPointerException("MultiSeqIndividual::getDate: no date associated to this individual."));
+}
+
+/******************************************************************************/
+
+bool MultiSeqIndividual::hasDate() const
+{
+ return date_ != 0;
+}
+
+/******************************************************************************/
+
+// Coord
+void MultiSeqIndividual::setCoord(const Point2D<double>& coord)
+{
+ if (!hasCoord())
+ {
+ coord_ = new Point2D<double>(coord);
+ }
+ else if (*coord_ != coord)
+ {
+ delete coord_;
+ coord_ = new Point2D<double>(coord);
+ }
+}
+
+/******************************************************************************/
+
+void MultiSeqIndividual::setCoord(const double x, const double y)
+{
+ if (!hasCoord())
+ {
+ coord_ = new Point2D<double>(x, y);
+ }
+ else if (this->getX() != x || this->getY() != y)
+ {
+ delete coord_;
+ coord_ = new Point2D<double>(x, y);
+ }
+}
+
+/******************************************************************************/
+
+const Point2D<double>* MultiSeqIndividual::getCoord() const throw (NullPointerException)
+{
+ if (hasCoord())
+ return new Point2D<double>(*coord_);
+ else
+ throw (NullPointerException("MultiSeqIndividual::getCoord: no coord associated to this individual."));
+}
+
+/******************************************************************************/
+
+bool MultiSeqIndividual::hasCoord() const
+{
+ return coord_ != 0;
+}
+
+/******************************************************************************/
+
+void MultiSeqIndividual::setX(const double x) throw (NullPointerException)
+{
+ if (hasCoord())
+ coord_->setX(x);
+ else
+ throw (NullPointerException("MultiSeqIndividual::setX: no coord associated to this individual."));
+}
+
+/******************************************************************************/
+
+void MultiSeqIndividual::setY(const double y) throw (NullPointerException)
+{
+ if (hasCoord())
+ coord_->setY(y);
+ else
+ throw (NullPointerException("MultiSeqIndividual::setY: no coord associated to this individual."));
+}
+
+/******************************************************************************/
+
+double MultiSeqIndividual::getX() const throw (NullPointerException)
+{
+ if (hasCoord())
+ return coord_->getX();
+ else
+ throw (NullPointerException("MultiSeqIndividual::getX: no coord associated to this individual."));
+}
+
+/******************************************************************************/
+
+double MultiSeqIndividual::getY() const throw (NullPointerException)
+{
+ if (hasCoord())
+ return coord_->getY();
+ else
+ throw (NullPointerException("MultiSeqIndividual::getY: no coord associated to this individual."));
+}
+
+/******************************************************************************/
+
+// Locality
+void MultiSeqIndividual::setLocality(const Locality<double>* locality)
+{
+ locality_ = locality;
+}
+
+/******************************************************************************/
+
+const Locality<double>* MultiSeqIndividual::getLocality() const throw (NullPointerException)
+{
+ if (hasLocality())
+ return locality_;
+ else
+ throw (NullPointerException("MultiSeqIndividual::getLocality: no locality associated to this individual."));
+}
+
+/******************************************************************************/
+
+bool MultiSeqIndividual::hasLocality() const
+{
+ return locality_ != 0;
+}
+
+/******************************************************************************/
+
+// Sequences
+const VectorSequenceContainer* MultiSeqIndividual::getVectorSequenceContainer(const std::string& id) const throw (Exception)
+{
+ map<string, VectorSequenceContainer*>::const_iterator it;
+ it = sequences_.find(id);
+ // Test existence of id in the map.
+ if (it == sequences_.end())
+ {
+ string mes = "MultiSeqIndividual::getSequence: sequence set not found (" + id
+ + ").";
+ throw (Exception(mes));
+ }
+ return const_cast<const VectorSequenceContainer*>(it->second);
+}
+
+/******************************************************************************/
+
+void MultiSeqIndividual::addSequence(const std::string& id, const Sequence& sequence)
+throw (Exception)
+{
+ try
+ {
+ sequences_[id]->addSequence(sequence);
+ }
+ catch (AlphabetMismatchException& ame)
+ {
+ throw (AlphabetMismatchException("MultiSeqIndividual::addSequence: alphabets don't match.", ame.getAlphabets()[0], ame.getAlphabets()[1]));
+ }
+ catch (Exception& e)
+ {
+ throw (BadIdentifierException("MultiSeqIndividual::addSequence: sequence's name already in use.", sequence.getName()));
+ }
+}
+
+/******************************************************************************/
+
+const Sequence& MultiSeqIndividual::getSequence(const std::string& id, const std::string& name)
+const throw (Exception)
+{
+ map<string, VectorSequenceContainer*>::const_iterator it;
+ it = sequences_.find(id);
+ // Test existence of id in the map.
+ if (it == sequences_.end())
+ {
+ string mes = "MultiSeqIndividual::getSequence: sequence set not found (" + id
+ + ").";
+ throw (Exception(mes));
+ }
+ try
+ {
+ return const_cast<const VectorSequenceContainer*>(it->second)->getSequence(name);
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw (snfe);
+ }
+}
+
+/******************************************************************************/
+
+const Sequence& MultiSeqIndividual::getSequence(const std::string& id, size_t i)
+const throw (Exception)
+{
+ map<string, VectorSequenceContainer*>::const_iterator it;
+ it = sequences_.find(id);
+ // Test existence of id in the map.
+ if (it == sequences_.end())
+ {
+ string mes = "MultiSeqIndividual::getSequence: sequence set not found (" + id
+ + ").";
+ throw (Exception(mes));
+ }
+ try
+ {
+ return const_cast<const VectorSequenceContainer*>(it->second)->getSequence(i);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw (ioobe);
+ }
+}
+
+/******************************************************************************/
+
+std::vector<std::string> MultiSeqIndividual::getSequencesKeys() const
+{
+ vector<string> keys;
+ map<string, VectorSequenceContainer*>::const_iterator it;
+ for (it = sequences_.begin(); it != sequences_.end(); it++)
+ {
+ keys.push_back(it->first);
+ }
+ return keys;
+}
+
+/******************************************************************************/
+
+bool MultiSeqIndividual::hasSequences() const
+{
+ return sequences_.size() != 0;
+}
+
+/******************************************************************************/
+
+size_t MultiSeqIndividual::getNumberOfSequenceSet() const
+{
+ return sequences_.size();
+}
+
+/******************************************************************************/
+
+size_t MultiSeqIndividual::getNumberOfSequences(const std::string& id) const
+throw (Exception)
+{
+ map<string, VectorSequenceContainer*>::const_iterator it;
+ it = sequences_.find(id);
+ // Test existence of id in the map.
+ if (it == sequences_.end())
+ {
+ string mes = "MultiSeqIndividual::getSequence: sequence set not found (" + id
+ + ").";
+ throw (Exception(mes));
+ }
+
+ return const_cast<const VectorSequenceContainer*>(it->second)->getNumberOfSequences();
+}
+
+/******************************************************************************/
+
+// MultilocusGenotype
+
+void MultiSeqIndividual::addGenotype(const MultilocusGenotype& genotype)
+{
+ genotype_ = new MultilocusGenotype(genotype);
+}
+
+/******************************************************************************/
+
+const MultilocusGenotype* MultiSeqIndividual::getGenotype() const throw (NullPointerException)
+{
+ return genotype_;
+}
+
+/******************************************************************************/
+
+bool MultiSeqIndividual::hasGenotype() const
+{
+ return genotype_ != 0;
+}
+
+/******************************************************************************/
+
diff --git a/src/Bpp/PopGen/MultiSeqIndividual.h b/src/Bpp/PopGen/MultiSeqIndividual.h
new file mode 100644
index 0000000..e90fcd7
--- /dev/null
+++ b/src/Bpp/PopGen/MultiSeqIndividual.h
@@ -0,0 +1,369 @@
+//
+// File MultiSeqIndividual.h
+// Author : Sylvain Gaillard
+// Last modification : Tuesday August 03 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _MULTISEQINDIVIDUAL_H_
+#define _MULTISEQINDIVIDUAL_H_
+
+// From STL
+#include <map>
+#include <vector>
+#include <string>
+
+#include <Bpp/Clonable.h>
+#include <Bpp/Exceptions.h>
+#include <Bpp/Graphics/Point2D.h>
+
+// From SeqLib
+#include <Bpp/Seq/Sequence.h>
+#include <Bpp/Seq/SequenceExceptions.h>
+#include <Bpp/Seq/Container/VectorSequenceContainer.h>
+
+// From PopGenLib
+#include "Locality.h"
+#include "Date.h"
+#include "MultilocusGenotype.h"
+#include "GeneralExceptions.h"
+
+namespace bpp
+{
+/**
+ * @brief <center><b>*** UNUSED CLASS ***</b></center>The MultiSeqIndividual class.
+ *
+ * <center><b>*** UNUSED CLASS ***</b></center>
+ * This class is designed to store data on a single individual.
+ * This individual can store numerous sequences for each place. It was the
+ * first working implementation which manages sequences as a map of sequence
+ * container. We have replaced it with a simplest individual with only one
+ * sequence per locus.
+ *
+ * @author Sylvain Gaillard
+ */
+class MultiSeqIndividual
+{
+private:
+ std::string id_;
+ unsigned short sex_;
+ Date* date_;
+ Point2D<double>* coord_;
+ const Locality<double>* locality_;
+ std::map<std::string, VectorSequenceContainer*> sequences_;
+ MultilocusGenotype* genotype_;
+
+public:
+ // Constructors and destructor :
+ /**
+ * @brief Build a void new MultiSeqIndividual.
+ */
+ MultiSeqIndividual();
+
+ /**
+ * @brief Build a new MultiSeqIndividual with an identifier.
+ */
+ MultiSeqIndividual(const std::string& id);
+
+ /**
+ * @brief Build a new MultiSeqIndividual with parameters.
+ *
+ * @param id The id of the MultiSeqIndividual as a string.
+ * @param date The date of the MultiSeqIndividual as a Date object.
+ * @param coord The coordinates of the MultiSeqIndividual as a Coord object.
+ * @param locality The locality of the MultiSeqIndividual as a pointer to a Locality
+ * object.
+ * @param sex The sex of the MultiSeqIndividual as an unsigned short.
+ */
+ MultiSeqIndividual(const std::string& id,
+ const Date& date,
+ const Point2D<double>& coord,
+ Locality<double>* locality,
+ const unsigned short sex);
+
+ /**
+ * @brief The MultiSeqIndividual copy constructor.
+ */
+ MultiSeqIndividual(const MultiSeqIndividual& ind);
+
+ /**
+ * @brief Destroy an MultiSeqIndividual.
+ */
+ virtual ~MultiSeqIndividual();
+
+public:
+ // Methodes
+ /**
+ * @brief The MultiSeqIndividual copy operator.
+ *
+ * @return A ref toward the assigned MultiSeqIndividual.
+ * Make a copy of each atribute of the MultiSeqIndividual.
+ */
+ MultiSeqIndividual& operator=(const MultiSeqIndividual& ind);
+
+ /**
+ * @brief Set the id of the MultiSeqIndividual.
+ *
+ * @param id The id of the MultiSeqIndividual as a string.
+ */
+ void setId(const std::string id);
+
+ /**
+ * @brief Get the id of the MultiSeqIndividual.
+ *
+ * @return The id of the MultiSeqIndividual as a string.
+ */
+ std::string getId() const;
+
+ /**
+ * @brief Set the sex of the MultiSeqIndividual.
+ *
+ * @param sex An unsigned short coding for the sex.
+ */
+ void setSex(const unsigned short sex);
+
+ /**
+ * @brief Get the sex of the MultiSeqIndividual.
+ *
+ * @return The sex of the MultiSeqIndividual as an unsigned short.
+ */
+ unsigned short getSex() const;
+
+ /**
+ * @brief Set the date of the MultiSeqIndividual.
+ *
+ * @param date The date as a Date object.
+ */
+ void setDate(const Date& date);
+
+ /**
+ * @brief Get the date of the MultiSeqIndividual.
+ *
+ * @return A pointer toward a Date object if the MultiSeqIndividual has a date.
+ * Otherwise throw a NullPointerException.
+ */
+ const Date* getDate() const throw (NullPointerException);
+
+ /**
+ * @brief Tell if this MultiSeqIndividual has a date.
+ */
+ bool hasDate() const;
+
+ /**
+ * @brief Set the coodinates of the MultiSeqIndividual.
+ *
+ * @param coord A Point2D object.
+ */
+ void setCoord(const Point2D<double>& coord);
+
+ /**
+ * @brief Set the coordinates of the MultiSeqIndividual.
+ *
+ * @param x The X coordinate as a double.
+ * @param y The Y coordinate as a double.
+ */
+ void setCoord(const double x, const double y);
+
+ /**
+ * @brief Get the coordinates of the Induvidual.
+ *
+ * @return A pointer toward a Point2D object if the MultiSeqIndividual has
+ * coordinates. Otherwise throw a NullPointerException.
+ */
+ const Point2D<double>* getCoord() const throw (NullPointerException);
+
+ /**
+ * @brief Tell if this MultiSeqIndividual has coordinates.
+ */
+ bool hasCoord() const;
+
+ /**
+ * @brief Set the X coordinate of the MultiSeqIndividual.
+ *
+ * @param x The X coordinate as a double.
+ *
+ * Set the X coordinate if the MultiSeqIndividual has coordinates.
+ * Otherwise throw a NullPointerException.
+ */
+ void setX(const double x) throw (NullPointerException);
+
+ /**
+ * @brief Set the Y coordinate of th MultiSeqIndividual.
+ *
+ * @param y The Y coordinate as a double.
+ *
+ * Set the Y coordinate if the MultiSeqIndividual has coordinates.
+ * Otherwise throw a NullPointerException.
+ */
+ void setY(const double y) throw (NullPointerException);
+
+ /**
+ * @brief Get the X coordinate of the MultiSeqIndividual.
+ *
+ * @return The X coordinate as a double if the MultiSeqIndividual has coordinates.
+ * Otherwise throw a NullPointerException.
+ */
+ double getX() const throw (NullPointerException);
+
+ /**
+ * @brief Get the Y coordinate of the MultiSeqIndividual.
+ *
+ * @return The Y coordinate as a double if the MultiSeqIndividual has coordinates.
+ * Otherwise throw a NullPointerException.
+ */
+ double getY() const throw (NullPointerException);
+
+ /**
+ * @brief Set the locality of the MultiSeqIndividual.
+ *
+ * @param locality A pointer to a Locality object.
+ */
+ void setLocality(const Locality<double>* locality);
+
+ /**
+ * @brief Get the locality of the MultiSeqIndividual.
+ *
+ * @return A pointer to the Locality of the MultiSeqIndividual.
+ */
+ const Locality<double>* getLocality() const throw (NullPointerException);
+
+ /**
+ * @brief Tell if this MultiSeqIndividual has a locality.
+ */
+ bool hasLocality() const;
+
+ /**
+ * @brief Get a pointer to the VectorSequenceContainer at a named locus.
+ *
+ * @param id The id of the sequence set (i.e. locus).
+ */
+ const VectorSequenceContainer* getVectorSequenceContainer(const std::string& id) const
+ throw (Exception);
+
+ /**
+ * @brief Add a sequence in a named sequence set.
+ *
+ * @param id The id of the sequence set.
+ * @param sequence The sequence to add.
+ * @throw AlphabetMismatchException if the sequence's alphabet doesn't match the container's alphabet.
+ * @throw BadIdentifierException if sequence's name is already in use.
+ */
+ void addSequence(const std::string& id, const Sequence& sequence)
+ throw (Exception);
+
+ /**
+ * @brief Get a named sequence from a named sequence set.
+ *
+ * @param id The id of the sequence set.
+ * @param name The name of the sequence.
+ *
+ * @return A reference to the sequence.
+ */
+ const Sequence& getSequence(const std::string& id, const std::string& name)
+ const throw (Exception);
+
+ /**
+ * @brief Get an indexed sequence from a named sequence set.
+ *
+ * @param id The id of the sequence set.
+ * @param i The index of the sequence in the sequence set.
+ *
+ * @return A reference to the sequence.
+ */
+ const Sequence& getSequence(const std::string& id, const size_t i)
+ const throw (Exception);
+
+ /**
+ * @brief Get the sequence set ids.
+ *
+ * @return All the keys of the sequence sets in a vector.
+ */
+ std::vector<std::string> getSequencesKeys() const;
+
+ /**
+ * @brief Remove a named sequence from a named sequence set.
+ *
+ * @param id The id of the sequence set.
+ * @param name The name of the sequence.
+ *
+ * @return A pointer to a copy of the removed sequence.
+ */
+ Sequence* removeSequence(const std::string& id, const std::string& name);
+
+ /**
+ * @brief Delete a named sequence from a named sequence set.
+ *
+ * @param id The id of the sequence set.
+ * @param name The name of the sequence.
+ */
+ void deleteSequence(const std::string& id, const std::string& name);
+
+ /**
+ * @brief Tell if the MultiSeqIndividual has some sequences.
+ */
+ bool hasSequences() const;
+
+ /**
+ * @brief Count the number of sequece set.
+ */
+ size_t getNumberOfSequenceSet() const;
+
+ /**
+ * @brief Get the number of sequences in a sequence set.
+ */
+ size_t getNumberOfSequences(const std::string& id) const
+ throw (Exception);
+
+ /**
+ * @brief Add a genotype.
+ *
+ * @param genotype The MultilocusGenotype to add.
+ */
+ void addGenotype(const MultilocusGenotype& genotype);
+
+ /**
+ * @brief Get the genotype.
+ */
+ const MultilocusGenotype* getGenotype() const throw (NullPointerException);
+
+ /**
+ * @brief Tell if the MultiSeqIndividual has a MultilocusGenotype.
+ */
+ bool hasGenotype() const;
+};
+} // end of namespace bpp;
+
+#endif // _MULTISEQINDIVIDUAL_H_
+
diff --git a/src/Bpp/PopGen/MultilocusGenotype.cpp b/src/Bpp/PopGen/MultilocusGenotype.cpp
new file mode 100644
index 0000000..0b0eab5
--- /dev/null
+++ b/src/Bpp/PopGen/MultilocusGenotype.cpp
@@ -0,0 +1,203 @@
+//
+// File MultilocusGenotype.cpp
+// Author : Sylvain Gaillard <sylvain.gaillard at angers.inra.fr>
+// Last modification : April 4, 2008
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "MultilocusGenotype.h"
+
+using namespace bpp;
+using namespace std;
+
+// ** Class constructor: *******************************************************/
+
+MultilocusGenotype::MultilocusGenotype(size_t loci_number) throw (BadIntegerException) : loci_(vector<MonolocusGenotype*>(loci_number))
+{
+ if (loci_number < 1)
+ throw BadIntegerException("MultilocusGenotype::MultilocusGenotype: loci_number must be > 0.", static_cast<int>(loci_number));
+
+ // Set all the loci_ pointers to nullptr
+ for (size_t i = 0; i < loci_number; i++)
+ {
+ loci_[i] = 0;
+ }
+}
+
+MultilocusGenotype::MultilocusGenotype(const MultilocusGenotype& genotype) : loci_(vector<MonolocusGenotype*>(genotype.size()))
+{
+ for (size_t i = 0; i < genotype.size(); i++)
+ {
+ if (!genotype.isMonolocusGenotypeMissing(i))
+ loci_[i] = dynamic_cast<MonolocusGenotype*>(genotype.getMonolocusGenotype(i).clone());
+ else
+ loci_[i] = 0;
+ }
+}
+
+// ** Class destructor: *******************************************************/
+
+MultilocusGenotype::~MultilocusGenotype()
+{
+ for (size_t i = 0; i < loci_.size(); i++)
+ {
+ delete loci_[i];
+ }
+ loci_.clear();
+}
+
+// ** Other methodes: *********************************************************/
+
+void MultilocusGenotype::setMonolocusGenotype(size_t locus_position,
+ const MonolocusGenotype& monogen) throw (IndexOutOfBoundsException)
+{
+ if (locus_position < loci_.size())
+ loci_[locus_position] = dynamic_cast<MonolocusGenotype*>(monogen.clone());
+ else
+ throw IndexOutOfBoundsException("MultilocusGenotype::setMonolocusGenotype: locus_position out of bounds.",
+ locus_position, 0, loci_.size());
+}
+
+void MultilocusGenotype::setMonolocusGenotypeByAlleleKey(size_t locus_position,
+ const std::vector<size_t>& allele_keys) throw (Exception)
+{
+ if (allele_keys.size() < 1)
+ throw Exception("MultilocusGenotype::setMonolocusGenotypeByAlleleKey: no key in allele_keys.");
+
+ if (locus_position < loci_.size())
+ {
+ setMonolocusGenotype(locus_position, *MonolocusGenotypeTools::buildMonolocusGenotypeByAlleleKey(allele_keys));
+ }
+ else
+ throw IndexOutOfBoundsException("MultilocusGenotype::setMonolocusGenotype: locus_position out of bounds.",
+ locus_position, 0, loci_.size());
+}
+
+void MultilocusGenotype::setMonolocusGenotypeByAlleleId(size_t locus_position,
+ const std::vector<std::string>& allele_id, const LocusInfo& locus_info) throw (Exception)
+{
+ vector<size_t> allele_keys;
+ for (size_t i = 0; i < allele_id.size(); i++)
+ {
+ try
+ {
+ allele_keys.push_back(locus_info.getAlleleInfoKey(allele_id[i]));
+ }
+ catch (AlleleNotFoundException& anfe)
+ {
+ throw AlleleNotFoundException("MultilocusGenotype::setMonolocusGenotypeByAlleleId: id not found.", anfe.getIdentifier());
+ }
+ }
+ try
+ {
+ setMonolocusGenotypeByAlleleKey(locus_position, allele_keys);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotype::setMonolocusGenotypeByAlleleId: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+}
+
+void MultilocusGenotype::setMonolocusGenotypeAsMissing(size_t locus_position) throw (IndexOutOfBoundsException)
+{
+ if (locus_position >= loci_.size())
+ throw IndexOutOfBoundsException("MultilocusGenotype::setMonolocusGenotypeAsMissing: locus_position out of bounds.", locus_position, 0, loci_.size());
+ if (loci_[locus_position] != NULL)
+ delete loci_[locus_position];
+ loci_[locus_position] = NULL;
+}
+
+bool MultilocusGenotype::isMonolocusGenotypeMissing(size_t locus_position) const throw (IndexOutOfBoundsException)
+{
+ if (locus_position >= loci_.size())
+ throw IndexOutOfBoundsException("MultilocusGenotype::isMonolocusGenotypeMissing: locus_position out of bounds.", locus_position, 0, loci_.size());
+ return loci_[locus_position] == NULL;
+}
+
+const MonolocusGenotype& MultilocusGenotype::getMonolocusGenotype(size_t locus_position) const throw (IndexOutOfBoundsException)
+{
+ if (locus_position >= loci_.size())
+ throw IndexOutOfBoundsException("MultilocusGenotype::getMonolocusGenotype: locus_position out of bounds", locus_position, 0, loci_.size());
+ return *loci_[locus_position];
+}
+
+size_t MultilocusGenotype::size() const
+{
+ return loci_.size();
+}
+
+size_t MultilocusGenotype::countNonMissingLoci() const
+{
+ size_t count = 0;
+ for (size_t i = 0; i < loci_.size(); i++)
+ {
+ if (loci_[i] != NULL)
+ count++;
+ }
+ return count;
+}
+
+size_t MultilocusGenotype::countHomozygousLoci() const
+{
+ size_t count = 0;
+ for (size_t i = 0; i < loci_.size(); i++)
+ {
+ try
+ {
+ if (dynamic_cast<BiAlleleMonolocusGenotype*>(loci_[i])->isHomozygous())
+ count++;
+ }
+ catch (...)
+ {}
+ }
+ return count;
+}
+
+size_t MultilocusGenotype::countHeterozygousLoci() const
+{
+ size_t count = 0;
+ for (size_t i = 0; i < loci_.size(); i++)
+ {
+ try
+ {
+ if (!(dynamic_cast<BiAlleleMonolocusGenotype*>(loci_[i])->isHomozygous()))
+ count++;
+ }
+ catch (...)
+ {}
+ }
+ return count;
+}
+
diff --git a/src/Bpp/PopGen/MultilocusGenotype.h b/src/Bpp/PopGen/MultilocusGenotype.h
new file mode 100644
index 0000000..ea8433a
--- /dev/null
+++ b/src/Bpp/PopGen/MultilocusGenotype.h
@@ -0,0 +1,159 @@
+//
+// File MultilocusGenotype.h
+// Author : Sylvain Gaillard
+// Last modification : April 4, 2008
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _MULTILOCUSGENOTYPE_H_
+#define _MULTILOCUSGENOTYPE_H_
+
+// From STL
+#include <vector>
+#include <string>
+
+#include <Bpp/Exceptions.h>
+
+// From Pop
+#include "MonolocusGenotype.h"
+#include "MonolocusGenotypeTools.h"
+#include "BiAlleleMonolocusGenotype.h"
+#include "MonoAlleleMonolocusGenotype.h"
+#include "LocusInfo.h"
+
+namespace bpp
+{
+/**
+ * @brief The MultilocusGenotype class.
+ *
+ * This is a MonolocusGenotype containor.
+ *
+ * @author Sylvain Gaillard
+ */
+class MultilocusGenotype
+{
+private:
+ std::vector<MonolocusGenotype*> loci_;
+
+public:
+ // Constructors and Destructor
+ /**
+ * @brief Build a MultilocusGenotype linked to an AnalyzedLoci object.
+ *
+ * @throw BadIntegerException if loci_number < 1.
+ */
+ MultilocusGenotype(size_t loci_number) throw (BadIntegerException);
+
+ /**
+ * @brief Copy constructor.
+ */
+ MultilocusGenotype(const MultilocusGenotype& genotype);
+
+ /**
+ * @brief Destroy a MultilocusGenotype.
+ */
+ ~MultilocusGenotype();
+
+public:
+ /**
+ * @brief Set a MonolocusGenotype.
+ */
+ void setMonolocusGenotype(size_t locus_position,
+ const MonolocusGenotype& monogen) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Set a MonolocusGenotype by allele keys.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci.
+ * @throw Exception if there is no key in allele_keys.
+ */
+ void setMonolocusGenotypeByAlleleKey(size_t locus_position,
+ const std::vector<size_t>& allele_keys) throw (Exception);
+
+ /**
+ * @brief Set a MonolocusGenotype by allele id.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci.
+ * @throw AlleleNotFoundException if at least one of the id is not found in the LocusInfo.
+ */
+ void setMonolocusGenotypeByAlleleId(size_t locus_position,
+ const std::vector<std::string>& allele_id, const LocusInfo& locus_info) throw (Exception);
+
+ /**
+ * @brief Set a MonolocusGenotype as missing data.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci.
+ */
+ void setMonolocusGenotypeAsMissing(size_t locus_position) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Tell if a MonolocusGenotype is a missing data.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci.
+ */
+ bool isMonolocusGenotypeMissing(size_t locus_position) const throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get a MonolocusGenotype.
+ */
+ const MonolocusGenotype& getMonolocusGenotype(size_t locus_position) const
+ throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Count the number of loci.
+ *
+ * Return the size of _loci.
+ */
+ size_t size() const;
+
+ /**
+ * @brief Count the number of non missing MonolocusGenotype.
+ */
+ size_t countNonMissingLoci() const;
+
+ /**
+ * @brief Count the number of homozygous MonolocusGenotype.
+ */
+ size_t countHomozygousLoci() const;
+
+ /**
+ * @brief Count the number of heterozygous MonolocusGenotype.
+ */
+ size_t countHeterozygousLoci() const;
+};
+} // end of namespace bpp;
+
+#endif // _MULTILOCUSGENOTYPE_H_
+
diff --git a/src/Bpp/PopGen/MultilocusGenotypeStatistics.cpp b/src/Bpp/PopGen/MultilocusGenotypeStatistics.cpp
new file mode 100644
index 0000000..a3d5f76
--- /dev/null
+++ b/src/Bpp/PopGen/MultilocusGenotypeStatistics.cpp
@@ -0,0 +1,816 @@
+/*
+ * File MultilocusGenotypeStatistics.cpp
+ * Author : Sylvain Gaillard <yragael2001 at yahoo.fr>
+ * Last modification : Wednesday August 04 2004
+ *
+ */
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include <Bpp/Utils/MapTools.h>
+
+#include "MultilocusGenotypeStatistics.h"
+#include "PolymorphismMultiGContainerTools.h"
+
+using namespace bpp;
+
+// From STL
+
+#include <iostream>
+#include <cmath>
+#include <algorithm>
+
+using namespace std;
+
+vector<size_t> MultilocusGenotypeStatistics::getAllelesIdsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (IndexOutOfBoundsException)
+{
+ map<size_t, size_t> tmp_alleles;
+ try
+ {
+ tmp_alleles = getAllelesMapForGroups(pmgc, locus_position, groups);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getAllelesIdsForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ return MapTools::getKeys(tmp_alleles);
+}
+
+size_t MultilocusGenotypeStatistics::countGametesForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (IndexOutOfBoundsException)
+{
+ map<size_t, size_t> allele_count;
+ size_t nb_tot_allele = 0;
+ try
+ {
+ allele_count = getAllelesMapForGroups(pmgc, locus_position, groups);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countGametesForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ vector<size_t> counter = MapTools::getValues(allele_count);
+ for (size_t i = 0; i < counter.size(); i++)
+ {
+ nb_tot_allele += counter[i];
+ }
+ return nb_tot_allele;
+}
+
+map<size_t, size_t> MultilocusGenotypeStatistics::getAllelesMapForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (IndexOutOfBoundsException)
+{
+ map<size_t, size_t> alleles_count;
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ try
+ {
+ if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (groups.find(pmgc.getGroupId(i)) != groups.end()) )
+ {
+ // if (! pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (find(groups.begin(), groups.end(), pmgc.getGroupId(i)) != groups.end()) ) {
+ vector<size_t> tmp_alleles = pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(locus_position).getAlleleIndex();
+ for (size_t j = 0; j < tmp_alleles.size(); j++)
+ {
+ alleles_count[tmp_alleles[j]]++;
+ }
+ }
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getAllelesMapForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ }
+ return alleles_count;
+}
+
+map<size_t, double> MultilocusGenotypeStatistics::getAllelesFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (Exception)
+{
+ map<size_t, double> alleles_frq;
+ size_t nb_tot_allele = 0;
+ map<size_t, size_t> tmp_alleles;
+ try
+ {
+ tmp_alleles = getAllelesMapForGroups(pmgc, locus_position, groups);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getAllelesFrqForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ vector<size_t> counter = MapTools::getValues(tmp_alleles);
+ for (size_t i = 0; i < counter.size(); i++)
+ {
+ nb_tot_allele += counter[i];
+ }
+ if (nb_tot_allele == 0)
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFrqForGroups.");
+ for (map<size_t, size_t>::iterator it = tmp_alleles.begin(); it != tmp_alleles.end(); it++)
+ {
+ alleles_frq[it->first] = static_cast<double>(it->second) / static_cast<double>(nb_tot_allele);
+ }
+ return alleles_frq;
+}
+
+size_t MultilocusGenotypeStatistics::countNonMissingForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (IndexOutOfBoundsException)
+{
+ size_t counter = 0;
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ try
+ {
+ // if (! pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (find(groups.begin(), groups.end(), pmgc.getGroupId(i)) != groups.end()) )
+ if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (groups.find(pmgc.getGroupId(i) ) != groups.end()) )
+ counter++;
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countNonMissing: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ }
+ return counter;
+}
+
+size_t MultilocusGenotypeStatistics::countBiAllelicForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (IndexOutOfBoundsException)
+{
+ size_t counter = 0;
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ try
+ {
+ // if (! pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (find(groups.begin(), groups.end(), pmgc.getGroupId(i)) != groups.end()) )
+ if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (groups.find(pmgc.getGroupId(i)) != groups.end()) )
+ if ((pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(locus_position).getAlleleIndex()).size() == 2)
+ counter++;
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countBiAllelic: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ }
+ return counter;
+}
+
+map<size_t, size_t> MultilocusGenotypeStatistics::countHeterozygousForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (IndexOutOfBoundsException)
+{
+ map<size_t, size_t> counter;
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ try
+ {
+ if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (groups.find(pmgc.getGroupId(i)) != groups.end() ))
+ {
+ const MonolocusGenotype& tmp_mg = pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(locus_position);
+ if ((tmp_mg.getAlleleIndex()).size() == 2)
+ {
+ if (!dynamic_cast<const BiAlleleMonolocusGenotype&>(tmp_mg).isHomozygous())
+ {
+ vector<size_t> tmp_alleles = tmp_mg.getAlleleIndex();
+ for (size_t j = 0; j < tmp_alleles.size(); j++)
+ {
+ counter[tmp_alleles[j]]++;
+ }
+ }
+ }
+ }
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countHeterozygous: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ }
+ return counter;
+}
+
+map<size_t, double> MultilocusGenotypeStatistics::getHeterozygousFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (Exception)
+{
+ map<size_t, double> freq;
+ size_t counter = 0;
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ try
+ {
+ if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locus_position) && (groups.find(pmgc.getGroupId(i)) != groups.end()) )
+ {
+ const MonolocusGenotype& tmp_mg = pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(locus_position);
+ if ((tmp_mg.getAlleleIndex()).size() == 2)
+ {
+ counter++;
+ if (!dynamic_cast<const BiAlleleMonolocusGenotype&>(tmp_mg).isHomozygous())
+ {
+ vector<size_t> tmp_alleles = tmp_mg.getAlleleIndex();
+ for (size_t j = 0; j < tmp_alleles.size(); j++)
+ {
+ freq[tmp_alleles[j]]++;
+ }
+ }
+ }
+ }
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHeterozygousFrqForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ }
+ if (counter == 0)
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getHeterozygousFrqForGroups.");
+ for (map<size_t, double>::iterator i = freq.begin(); i != freq.end(); i++)
+ {
+ i->second = (double) i->second / (double) counter;
+ }
+ return freq;
+}
+
+double MultilocusGenotypeStatistics::getHobsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (Exception)
+{
+ map<size_t, double> heterozygous_frq;
+ double frq = 0.;
+ try
+ {
+ heterozygous_frq = getHeterozygousFrqForGroups(pmgc, locus_position, groups);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHobsForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (ZeroDivisionException& zde)
+ {
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getHobsForGroups.");
+ }
+ for (map<size_t, double>::iterator it = heterozygous_frq.begin(); it != heterozygous_frq.end(); it++)
+ {
+ frq += it->second;
+ }
+ return frq / static_cast<double>(heterozygous_frq.size());
+}
+
+double MultilocusGenotypeStatistics::getHexpForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (Exception)
+{
+ map<size_t, double> allele_frq;
+ double frqsqr = 0.;
+ try
+ {
+ allele_frq = getAllelesFrqForGroups(pmgc, locus_position, groups);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHexpForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (ZeroDivisionException& zde)
+ {
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getHexpForGroups.");
+ }
+ for (map<size_t, double>::iterator it = allele_frq.begin(); it != allele_frq.end(); it++)
+ {
+ frqsqr += it->second * it->second;
+ }
+ return 1 - frqsqr;
+}
+
+double MultilocusGenotypeStatistics::getHnbForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (Exception)
+{
+ size_t nb_alleles;
+ double Hexp;
+ try
+ {
+ nb_alleles = countGametesForGroups(pmgc, locus_position, groups);
+ Hexp = getHexpForGroups(pmgc, locus_position, groups);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHnbForGroups: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ catch (ZeroDivisionException& zde)
+ {
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getHnbForGroups.");
+ }
+ return 2 * static_cast<double>(nb_alleles) * Hexp / static_cast<double>((2 * nb_alleles) - 1);
+}
+
+double MultilocusGenotypeStatistics::getDnei72(const PolymorphismMultiGContainer& pmgc, vector<size_t> locus_positions, size_t grp1, size_t grp2) throw (Exception)
+{
+ map<size_t, double> allele_frq1, allele_frq2;
+ vector<size_t> allele_ids;
+ set<size_t> group1_id;
+ set<size_t> group2_id;
+ set<size_t> groups_id;
+ double Jx = 0.;
+ double Jy = 0.;
+ double Jxy = 0.;
+ group1_id.insert(grp1);
+ group2_id.insert(grp2);
+ groups_id.insert(grp1);
+ groups_id.insert(grp2);
+ for (size_t i = 0; i < locus_positions.size(); i++)
+ {
+ allele_ids.clear();
+ allele_frq1.clear();
+ allele_frq2.clear();
+ try
+ {
+ allele_ids = getAllelesIdsForGroups(pmgc, locus_positions[i], groups_id);
+ allele_frq1 = getAllelesFrqForGroups(pmgc, locus_positions[i], group1_id);
+ allele_frq2 = getAllelesFrqForGroups(pmgc, locus_positions[i], group2_id);
+ }
+ catch (Exception& e)
+ {
+ throw e;
+ }
+ for (size_t j = 0; j < allele_ids.size(); j++)
+ {
+ map<size_t, double>::iterator it1 = allele_frq1.find(allele_ids[j]);
+ map<size_t, double>::iterator it2 = allele_frq2.find(allele_ids[j]);
+ double tmp_frq1 = (it1 != allele_frq1.end()) ? it1->second : 0.;
+ double tmp_frq2 = (it2 != allele_frq2.end()) ? it2->second : 0.;
+ Jx += tmp_frq1 * tmp_frq1;
+ Jy += tmp_frq2 * tmp_frq2;
+ Jxy += tmp_frq1 * tmp_frq2;
+ }
+ }
+ if (Jx * Jy == 0.)
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getDnei72.");
+ return -log(Jxy / sqrt(Jx * Jy));
+}
+
+double MultilocusGenotypeStatistics::getDnei78(const PolymorphismMultiGContainer& pmgc, vector<size_t> locus_positions, size_t grp1, size_t grp2) throw (Exception)
+{
+ map<size_t, double> allele_frq1, allele_frq2;
+ vector<size_t> allele_ids;
+ set<size_t> group1_id;
+ set<size_t> group2_id;
+ set<size_t> groups_id;
+ double Jx = 0.;
+ double Jy = 0.;
+ double Jxy = 0.;
+ size_t nx = 0, ny = 0;
+ group1_id.insert(grp1);
+ group2_id.insert(grp2);
+ groups_id.insert(grp1);
+ groups_id.insert(grp2);
+ for (size_t i = 0; i < locus_positions.size(); i++)
+ {
+ allele_ids.clear();
+ allele_frq1.clear();
+ allele_frq2.clear();
+ try
+ {
+ allele_ids = getAllelesIdsForGroups(pmgc, locus_positions[i], groups_id);
+ allele_frq1 = getAllelesFrqForGroups(pmgc, locus_positions[i], group1_id);
+ allele_frq2 = getAllelesFrqForGroups(pmgc, locus_positions[i], group2_id);
+ nx = countBiAllelicForGroups(pmgc, locus_positions[i], group1_id);
+ ny = countBiAllelicForGroups(pmgc, locus_positions[i], group2_id);
+ }
+ catch (Exception& e)
+ {
+ throw e;
+ }
+ double tmp_Jx = 0.;
+ double tmp_Jy = 0.;
+ for (size_t j = 0; j < allele_ids.size(); j++)
+ {
+ map<size_t, double>::iterator it1 = allele_frq1.find(allele_ids[j]);
+ map<size_t, double>::iterator it2 = allele_frq2.find(allele_ids[j]);
+ double tmp_frq1 = (it1 != allele_frq1.end()) ? it1->second : 0.;
+ double tmp_frq2 = (it2 != allele_frq2.end()) ? it2->second : 0.;
+ tmp_Jx += tmp_frq1 * tmp_frq1;
+ tmp_Jy += tmp_frq2 * tmp_frq2;
+ Jxy += tmp_frq1 * tmp_frq2;
+ }
+ Jx += ((2. * (double) nx * tmp_Jx) - 1.) / ((2. * (double) nx) - 1.);
+ Jy += ((2. * (double) ny * tmp_Jy) - 1.) / ((2. * (double) ny) - 1.);
+ }
+ double denom = Jx * Jy;
+ if (denom == 0.)
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getDnei78.");
+ return -log(Jxy / sqrt(denom));
+}
+
+map<size_t, MultilocusGenotypeStatistics::Fstats> MultilocusGenotypeStatistics::getAllelesFstats(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (Exception)
+{
+ map<size_t, MultilocusGenotypeStatistics::VarComp> vc = getVarianceComponents(pmgc, locus_position, groups);
+ map<size_t, MultilocusGenotypeStatistics::Fstats> f_stats;
+ for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = vc.begin(); it != vc.end(); it++)
+ {
+ double abc = it->second.a + it->second.b + it->second.c;
+ double bc = it->second.b + it->second.c;
+
+ if (abc == 0)
+ {
+ f_stats[it->first].Fit = NAN;
+ f_stats[it->first].Fst = NAN;
+ }
+ {
+ f_stats[it->first].Fit = 1. - it->second.c / abc;
+ f_stats[it->first].Fst = it->second.a / abc;
+ }
+ if (bc == 0)
+ f_stats[it->first].Fis = NAN;
+ else
+ f_stats[it->first].Fis = 1. - it->second.c / bc;
+ }
+ return f_stats;
+}
+
+map<size_t, double> MultilocusGenotypeStatistics::getAllelesFit(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (Exception)
+{
+ map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_position, groups);
+ map<size_t, double> Fit;
+ for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
+ {
+ Fit[it->first] = it->second.a + it->second.b + it->second.c;
+ if (Fit[it->first] == 0.)
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFit.");
+ Fit[it->first] = 1. - it->second.c / Fit[it->first];
+ }
+ return Fit;
+}
+
+map<size_t, double> MultilocusGenotypeStatistics::getAllelesFst(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (Exception)
+{
+ if (groups.size() <= 1)
+ throw BadIntegerException("MultilocusGenotypeStatistics::getAllelesFst: groups must be >= 2.", static_cast<int>(groups.size()));
+ map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_position, groups);
+ map<size_t, double> Fst;
+ for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
+ {
+ Fst[it->first] = it->second.a + it->second.b + it->second.c;
+ if (Fst[it->first] == 0.)
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFst.");
+ Fst[it->first] = it->second.a / Fst[it->first];
+ }
+ return Fst;
+}
+
+map<size_t, double> MultilocusGenotypeStatistics::getAllelesFis(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (Exception)
+{
+ map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_position, groups);
+ map<size_t, double> Fis;
+ for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
+ {
+ Fis[it->first] = it->second.b + it->second.c;
+ if (Fis[it->first] == 0.)
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFis.");
+ Fis[it->first] = 1. - it->second.c / Fis[it->first];
+ }
+ return Fis;
+}
+
+map<size_t, MultilocusGenotypeStatistics::VarComp> MultilocusGenotypeStatistics::getVarianceComponents(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const set<size_t>& groups) throw (ZeroDivisionException)
+{
+ map<size_t, MultilocusGenotypeStatistics::VarComp> values;
+ // Base values computation
+ double nbar = 0.;
+ double nc = 0.;
+ vector<size_t> ids = getAllelesIdsForGroups(pmgc, locus_position, groups);
+ map<size_t, double> pbar;
+ map<size_t, double> s2;
+ map<size_t, double> hbar;
+ for (size_t i = 0; i < ids.size(); i++)
+ {
+ pbar[ids[i]] = 0.;
+ s2[ids[i]] = 0.;
+ hbar[ids[i]] = 0.;
+ }
+ double r = static_cast<double>(groups.size());
+ for (set<size_t>::iterator set_it = groups.begin(); set_it != groups.end(); set_it++)
+ {
+ size_t i = (*set_it);
+ double ni = static_cast<double>(pmgc.getLocusGroupSize(i, locus_position));
+ set<size_t> group_id;
+ group_id.insert( i );
+ map<size_t, double> pi = getAllelesFrqForGroups(pmgc, locus_position, group_id);
+ map<size_t, double> hi = getHeterozygousFrqForGroups(pmgc, locus_position, group_id);
+ nbar += ni;
+ if (r > 1)
+ nc += ni * ni;
+
+ for (map<size_t, double>::iterator it = pi.begin(); it != pi.end(); it++)
+ {
+ pbar[it->first] += ni * it->second;
+ }
+ for (map<size_t, double>::iterator it = hi.begin(); it != hi.end(); it++)
+ {
+ hbar[it->first] += ni * it->second;
+ }
+
+ group_id.clear();
+ }
+ nbar = nbar / r;
+ if (nbar <= 1)
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getVarianceComponents.");
+ if (r > 1)
+ nc = (r * nbar) - (nc / (r * nbar)) / (r - 1.);
+ for (map<size_t, double>::iterator it = pbar.begin(); it != pbar.end(); it++)
+ {
+ it->second = it->second / (r * nbar);
+ }
+ for (map<size_t, double>::iterator it = hbar.begin(); it != hbar.end(); it++)
+ {
+ it->second = it->second / ( r * nbar);
+ }
+
+ for (set<size_t>::iterator set_it = groups.begin(); set_it != groups.end(); set_it++)
+ {
+ size_t i = (*set_it);
+ double ni = static_cast<double>(pmgc.getLocusGroupSize( i, locus_position));
+ set<size_t> group_id;
+ group_id.insert( i );
+ map<size_t, double> pi = getAllelesFrqForGroups(pmgc, locus_position, group_id);
+ for (size_t j = 0; j < ids.size(); j++)
+ {
+ pi[ids[j]];
+ }
+ for (map<size_t, double>::iterator it = pi.begin(); it != pi.end(); it++)
+ {
+ s2[it->first] += ni * (it->second - pbar[it->first]) * (it->second - pbar[it->first]);
+ }
+ group_id.clear();
+ }
+ for (map<size_t, double>::iterator it = s2.begin(); it != s2.end(); it++)
+ {
+ it->second = it->second / ((r - 1.) * nbar);
+ }
+
+ // a, b, c computation
+ for (size_t i = 0; i < ids.size(); i++)
+ {
+ values[ids[i]];
+ }
+
+ for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
+ {
+ it->second.a = (nbar / nc) * (s2[it->first] - ((1. / (nbar - 1.)) * ((pbar[it->first] * (1. - pbar[it->first])) - (s2[it->first] * ((double) r - 1.) / r) - ((1. / 4.) * hbar[it->first]))));
+ it->second.b = (nbar / (nbar - 1.)) * ((pbar[it->first] * (1. - pbar[it->first])) - (s2[it->first] * ((double) r - 1.) / (double) r) - ((((2. * nbar) - 1.) / (4. * nbar)) * hbar[it->first]));
+ it->second.c = hbar[it->first] / 2.;
+ }
+ return values;
+}
+
+double MultilocusGenotypeStatistics::getWCMultilocusFst(const PolymorphismMultiGContainer& pmgc, vector<size_t> locus_positions, const set<size_t>& groups) throw (Exception)
+{
+ double A, B, C;
+ A = B = C = 0.0;
+ for (size_t i = 0; i < locus_positions.size(); i++)
+ {
+ //count total number of individuals without missing data
+ size_t ni = 0;
+ for (set<size_t>::iterator setIt = groups.begin() ; setIt != groups.end() ; setIt++)
+ {
+ ni += pmgc.getLocusGroupSize( (*setIt), i);
+ }
+
+ // reduce computation for polymorphic loci for that groups
+ vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
+ if (ids.size() >= 2 && ni >= 1)
+ {
+ map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_positions[i], groups);
+ for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
+ {
+ A += it->second.a;
+ B += it->second.b;
+ C += it->second.c;
+ }
+ }
+ }
+ if ((A + B + C) == 0)
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getWCMultilocusFst.");
+ return A / (A + B + C);
+}
+
+double MultilocusGenotypeStatistics::getWCMultilocusFis(const PolymorphismMultiGContainer& pmgc, vector<size_t> locus_positions, const set<size_t>& groups) throw (Exception)
+{
+ double B, C;
+ B = C = 0.0;
+ for (size_t i = 0; i < locus_positions.size(); i++)
+ {
+ //count total number of individuals without missing data
+ size_t ni = 0;
+ for (set<size_t>::iterator setIt = groups.begin() ; setIt != groups.end() ; setIt++)
+ {
+ ni += pmgc.getLocusGroupSize( (*setIt), i);
+ }
+
+ // reduce computation for polymorphic loci for that groups
+ vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
+ if (ids.size() >= 2 && ni >= 1)
+ {
+ map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_positions[i], groups);
+ for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
+ {
+ B += it->second.b;
+ C += it->second.c;
+ }
+ }
+ }
+ if ((B + C) == 0)
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getWCMultilocusFis.");
+ return 1.0 - C / (B + C);
+}
+
+MultilocusGenotypeStatistics::PermResults MultilocusGenotypeStatistics::getWCMultilocusFstAndPerm(const PolymorphismMultiGContainer& pmgc, vector<size_t> locus_positions, set<size_t> groups, int nb_perm) throw (Exception)
+{
+ // extract a PolymorphismMultiGContainer with only those groups
+ PolymorphismMultiGContainer sub_pmgc = PolymorphismMultiGContainerTools::extractGroups(pmgc, groups);
+ double nb_sup = 0.0;
+ double nb_inf = 0.0;
+ PermResults results;
+ results.Statistic = getWCMultilocusFst(sub_pmgc, locus_positions, groups);
+ if (nb_perm > 0)
+ {
+ for (int i = 0; i < nb_perm; i++)
+ {
+ PolymorphismMultiGContainer permuted_pmgc = PolymorphismMultiGContainerTools::permutMultiG( sub_pmgc);
+ double Fst_perm = getWCMultilocusFst(permuted_pmgc, locus_positions, groups);
+ // cout << Fst_perm << endl;
+ if (Fst_perm > results.Statistic)
+ nb_sup++;
+ if (Fst_perm < results.Statistic)
+ nb_inf++;
+ }
+
+ nb_sup /= (double) nb_perm;
+ nb_inf /= (double) nb_perm;
+ }
+
+ results.Percent_sup = nb_sup;
+ results.Percent_inf = nb_inf;
+ return results;
+}
+
+MultilocusGenotypeStatistics::PermResults MultilocusGenotypeStatistics::getWCMultilocusFisAndPerm(const PolymorphismMultiGContainer& pmgc, vector<size_t> locus_positions, set<size_t> groups, int nb_perm) throw (Exception)
+{
+ // extract a PolymorphismMultiGContainer with only those groups
+ PolymorphismMultiGContainer sub_pmgc = PolymorphismMultiGContainerTools::extractGroups(pmgc, groups);
+ double nb_sup = 0.0;
+ double nb_inf = 0.0;
+ PermResults results;
+ results.Statistic = getWCMultilocusFis(sub_pmgc, locus_positions, groups);
+ if (nb_perm > 0)
+ {
+ for (int i = 0; i < nb_perm; i++)
+ {
+ PolymorphismMultiGContainer permuted_pmgc = PolymorphismMultiGContainerTools::permutIntraGroupAlleles(sub_pmgc, groups);
+ double Fis_perm = getWCMultilocusFis(permuted_pmgc, locus_positions, groups);
+
+ if (Fis_perm > results.Statistic)
+ nb_sup++;
+ if (Fis_perm < results.Statistic)
+ nb_inf++;
+ }
+
+ nb_sup /= (double) nb_perm;
+ nb_inf /= (double) nb_perm;
+ }
+
+ results.Percent_sup = nb_sup;
+ results.Percent_inf = nb_inf;
+ return results;
+}
+
+double MultilocusGenotypeStatistics::getRHMultilocusFst(const PolymorphismMultiGContainer& pmgc, vector<size_t> locus_positions, const set<size_t>& groups) throw (Exception)
+{
+ double Au, Bu, Cu;
+ double RH = 0.0;
+ int nb_alleles = 0;
+ int total_alleles = 0;
+
+ for (size_t i = 0; i < locus_positions.size(); i++)
+ {
+ // reduce computation for polymorphic loci for that groups
+ vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
+ if (ids.size() >= 2)
+ {
+ nb_alleles = 0;
+ // mean allelic frequencies
+ map< size_t, double > P = MultilocusGenotypeStatistics::getAllelesFrqForGroups (pmgc, locus_positions[i], groups);
+ // variance components from W&C
+ map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_positions[i], groups);
+ for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
+ {
+ Au = it->second.a;
+ Bu = it->second.b;
+ Cu = it->second.c;
+ if ((Au + Bu + Cu) != 0)
+ {
+ double Pu = P[it->first]; // it->first is the allele number
+ RH += (1 - Pu) * Au / (Au + Bu + Cu);
+ nb_alleles++;
+ }
+ }
+ total_alleles += (nb_alleles - 1);
+ }
+ }
+ if (total_alleles == 0)
+ throw ZeroDivisionException("MultilocusGenotypeStatistics::getRHMultilocusFst.");
+ return RH / double(total_alleles);
+}
+
+std::auto_ptr<DistanceMatrix> MultilocusGenotypeStatistics::getDistanceMatrix(const PolymorphismMultiGContainer& pmgc, vector<size_t> locus_positions, const set<size_t>& groups, string distance_methode) throw (Exception)
+{
+ vector<string> names = pmgc.getAllGroupsNames();
+ vector<size_t> grp_ids_vect;
+ for (set<size_t>::iterator i = groups.begin(); i != groups.end(); i++)
+ {
+ grp_ids_vect.push_back(*i);
+ }
+
+ auto_ptr<DistanceMatrix> _dist(new DistanceMatrix(names));
+ for (size_t i = 0; i < groups.size(); i++)
+ {
+ (*_dist)(i, i) = 0;
+ }
+
+ set<size_t> pairwise_grp;
+
+ for (size_t j = 0; j < groups.size () - 1; j++)
+ {
+ for (size_t k = j + 1; k < groups.size (); k++)
+ {
+ double distance = 0;
+ if (distance_methode == "nei72")
+ distance = MultilocusGenotypeStatistics::getDnei72( pmgc, locus_positions, grp_ids_vect[j], grp_ids_vect[k] );
+ else if (distance_methode == "nei78")
+ distance = MultilocusGenotypeStatistics::getDnei78( pmgc, locus_positions, grp_ids_vect[j], grp_ids_vect[k] );
+ else if (distance_methode == "WC") // Fst multilocus selon W&C
+ {
+ pairwise_grp.insert(grp_ids_vect[j] );
+ pairwise_grp.insert(grp_ids_vect[k] );
+ distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locus_positions, pairwise_grp);
+ pairwise_grp.clear();
+ }
+ else if (distance_methode == "RH") // Fst multilocus selon ponderation Robertson & Hill
+ {
+ pairwise_grp.insert(grp_ids_vect[j] );
+ pairwise_grp.insert(grp_ids_vect[k] );
+ distance = MultilocusGenotypeStatistics::getRHMultilocusFst( pmgc, locus_positions, pairwise_grp);
+ pairwise_grp.clear();
+ }
+ else if (distance_methode == "Nm") // Nm déduit des Fst multilocus selon W&C modèle en îles Fst = 1/(1+4Nm)
+ {
+ pairwise_grp.insert(grp_ids_vect[j] );
+ pairwise_grp.insert(grp_ids_vect[k] );
+ distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locus_positions, pairwise_grp);
+ if (distance != 0)
+ distance = 0.25 * (1 - distance) / distance;
+ else
+ distance = NAN;
+ pairwise_grp.clear();
+ }
+ else if (distance_methode == "D") // D=-ln(1-Fst) of Reynolds, Weir and Cockerham, 1983
+ {
+ pairwise_grp.insert(grp_ids_vect[j] );
+ pairwise_grp.insert(grp_ids_vect[k] );
+ distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locus_positions, pairwise_grp);
+ if (distance != 1)
+ distance = -log(1 - distance);
+ else
+ distance = NAN;
+ pairwise_grp.clear();
+ }
+ else if (distance_methode == "Rousset") // Calcul de Fst/(1-Fst). Rousset F. 1997
+ {
+ pairwise_grp.insert(grp_ids_vect[j] );
+ pairwise_grp.insert(grp_ids_vect[k] );
+ distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locus_positions, pairwise_grp);
+ if (distance != 1)
+ distance = distance / (1 - distance);
+ else
+ distance = NAN;
+ pairwise_grp.clear();
+ }
+
+ (*_dist)(k, j) = distance;
+ (*_dist)(j, k) = distance;
+ } // for k
+ } // for j
+
+ return _dist;
+}
+
diff --git a/src/Bpp/PopGen/MultilocusGenotypeStatistics.h b/src/Bpp/PopGen/MultilocusGenotypeStatistics.h
new file mode 100644
index 0000000..deb65db
--- /dev/null
+++ b/src/Bpp/PopGen/MultilocusGenotypeStatistics.h
@@ -0,0 +1,294 @@
+//
+// File MultilocusGenotypeStatistics.h
+// Authors : Sylvain Gaillard
+// Khalid Belkhir
+// Last modification : Wednesday August 04 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _MULTILOCUSGENOTYPESTATISTICS_H_
+#define _MULTILOCUSGENOTYPESTATISTICS_H_
+
+// From STL
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+#include <memory>
+
+#include <Bpp/Exceptions.h>
+
+// From SeqLib
+#include <Bpp/Seq/DistanceMatrix.h>
+
+// From popgenlib
+#include "PolymorphismMultiGContainer.h"
+#include "MultilocusGenotype.h"
+#include "GeneralExceptions.h"
+
+namespace bpp
+{
+/**
+ * @brief The MultilocusGenotypeStatistics class
+ *
+ * This class is a set of static method for PolymorphismMultiGContainer.
+ *
+ * @author Sylvain Gaillard
+ */
+class MultilocusGenotypeStatistics
+{
+public:
+ struct VarComp
+ {
+ double a;
+ double b;
+ double c;
+ };
+
+ struct Fstats
+ {
+ double Fit;
+ double Fst;
+ double Fis;
+ };
+
+ struct PermResults
+ {
+ double Statistic;
+ double Percent_sup;
+ double Percent_inf;
+ };
+
+ /**
+ * @brief Get the alleles' id at one locus for a set of groups.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ */
+ static std::vector<size_t> getAllelesIdsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Count the number of allele (gametes) at a locus for a set of groups.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ */
+ static size_t countGametesForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get a map of allele count for a set of groups.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ */
+ static std::map<size_t, size_t> getAllelesMapForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the alleles frequencies at one locus for a set of groups.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ * @throw ZeroDivisionException if the number of considered alleles = 0.
+ */
+ static std::map<size_t, double> getAllelesFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Count the number of non-missing data at a given locus for a set of groups.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ */
+ static size_t countNonMissingForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Counr the number of bi-allelic MonolocusGenotype at a given locus for a set of groups.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ */
+ static size_t countBiAllelicForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Count how many times each allele is found in an heterozygous MonolocusGenotype in a set of groups.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ */
+ static std::map<size_t, size_t> countHeterozygousForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the heterozygous frequencies for each allele at a locus in a set of groups.
+ *
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ * @throw ZeroDivisionException if the number of considered alleles = 0.
+ */
+ static std::map<size_t, double> getHeterozygousFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Compute the observed heterozygosity for one locus.
+ *
+ * This is the mean value of the getHeterozygousFrqForGroups map.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ * @throw ZeroDivisionException if the number of considered alleles = 0.
+ */
+ static double getHobsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Compute the expected heterozygosity for one locus.
+ *
+ * Nei 1977
+ * @f[
+ * H_{exp}=1-\sum_{i=1}^{n}x_i^2
+ * @f]
+ * where @f$x_i at f$ is the frequency of the i<sup>th</sup> allele and @f$n at f$ the number of alleles.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ * @throw ZeroDivisionException if the number of considered alleles = 0.
+ */
+ static double getHexpForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Compute the expected non biased heterozygosity for one locus.
+ *
+ * Nei 1978
+ * @f[
+ * H_{nb}=\frac{2n}{2n-1}\left(1-\sum_{i=1}^{n}x_i^2\right)=\frac{2n}{2n-1}H_{exp}
+ * @f]
+ * where @f$x_i at f$ is the frequency of the i<sup>th</sup> allele and @f$n at f$ the number of alleles.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ * @throw ZeroDivisionException if the number of considered alleles = 0.
+ */
+ static double getHnbForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Compute the Nei distance between two groups at one locus.
+ *
+ * Nei 1972
+ * @f[
+ * \hat{D}_1=-\ln \left[\frac{\displaystyle\sum_{i=1}^{n}\left(x_i\times y_i\right)}
+ * {\sqrt{\displaystyle\sum_{i=1}^{n}x_i^2\times \displaystyle\sum_{i=1}^{n}y_i^2}}\right]
+ * @f]
+ * where @f$x_i at f$ and @f$y_i at f$ are respectively the i<sup>th</sup> allele's frequency of the first and second group
+ * and @f$n at f$ the total number of alleles of both groups.
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ * @throw ZeroDivisionException if the number of considered alleles = 0.
+ */
+ static double getDnei72(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, size_t grp1, size_t grp2) throw (Exception);
+
+ /**
+ * @brief Compute the Nei unbiased distance between two groups at a given number of loci.
+ *
+ * Nei 1978
+ * @f[
+ * \hat{D}=-\ln \left[\frac{\displaystyle\sum_{i=1}^{n}\left(x_i\times y_i\right)}
+ * {\sqrt{\frac{2n_XJ_X-1}{2n_X-1}\times\frac{2n_YJ_Y-1}{2n_YJ_Y}}}
+ * \right]
+ * @f]
+ * where @f$x_i at f$ and @f$y_i at f$ are respectively the i<sup>th</sup> allele's frequency of the first and second group,
+ * @f$n at f$ the total number of alleles of both groups, @f$n_X at f$ and @f$n_Y at f$ the number of alleles in the first and second group
+ * and
+ * @f[
+ * J_X=\sum_{i=1}^{n}x_i^2
+ * \qquad\textrm{and}\qquad
+ * J_Y=\sum_{i=1}^{n}y_i^2
+ * @f]
+ * @throw IndexOutOfBoundsException if locus_position excedes the number of loci of one MultilocusGenotype.
+ * @throw ZeroDivisionException if the number of considered alleles = 0.
+ */
+ static double getDnei78(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, size_t grp1, size_t grp2) throw (Exception);
+
+ /**
+ * @brief Compute the three F statistics of Weir and Cockerham for each allele of a given locus.
+ */
+ static std::map<size_t, Fstats> getAllelesFstats(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Compute the Weir and Cockerham Fit on a set of groups for each allele of a given locus.
+ */
+ static std::map<size_t, double> getAllelesFit(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Compute the Weir and Cockerham @f$\theta at f$ on a set of groups for each allele of a given locus.
+ */
+ static std::map<size_t, double> getAllelesFst(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Compute the Weir and Cockerham Fis on a set of groups for each allele of a given locus.
+ */
+ static std::map<size_t, double> getAllelesFis(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Get the variance components a, b and c (Weir and Cockerham, 1983).
+ */
+ static std::map<size_t, VarComp> getVarianceComponents(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups) throw (ZeroDivisionException);
+
+ /**
+ * @brief Compute the Weir and Cockerham @f$\theta{wc}@f$ on a set of groups for a given set of loci.
+ * The variance componenets for each allele are calculated and then combined over loci using Weir and Cockerham weighting.
+ */
+ static double getWCMultilocusFst(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Compute the Weir and Cockerham Fis on a set of groups for a given set of loci.
+ * The variance componenets for each allele are calculated and then combined over loci using Weir and Cockerham weighting.
+ */
+ static double getWCMultilocusFis(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Compute the Weir and Cockerham @f$\theta_{wc}@f$ on a set of groups for a given set of loci and make a permutation test.
+ * Multilocus @f$\theta at f$ is calculated as in getWCMultilocusFst on the original data set and on nb_perm data sets obtained after
+ * a permutation of individuals between the different groups.
+ * Return values are theta, % of values > theta and % of values < theta.
+ */
+ static PermResults getWCMultilocusFstAndPerm(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, std::set<size_t> groups, int nb_perm) throw (Exception);
+
+ /**
+ * @brief Compute the Weir and Cockerham Fis on a set of groups for a given set of loci and make a permutation test.
+ * Multilocus Fis is calculated as in getWCMultilocusFis on the original data set and on nb_perm data sets obtained after
+ * a permutation of alleles between individual of each group.
+ * Return values are Fis, % of values > Fis and % of values < Fis.
+ */
+ static PermResults getWCMultilocusFisAndPerm(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, std::set<size_t> groups, int nb_perm) throw (Exception);
+
+
+ /**
+ * @brief Compute the @f$\theta_{RH}@f$ on a set of groups for a given set of loci.
+ * The variance componenets for each allele are calculated and then combined over loci using RH weighting with alleles frequency.
+ */
+ static double getRHMultilocusFst(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, const std::set<size_t>& groups) throw (Exception);
+
+ /**
+ * @brief Compute pairwise distances on a set of groups for a given set of loci.
+ * distance is either Nei72, Nei78, Fst W&C or Fst Robertson & Hill, Nm,
+ * D=-ln(1-Fst) of Reynolds et al. 1983, Rousset 1997 Fst/(1-Fst)
+ */
+ static std::auto_ptr<DistanceMatrix> getDistanceMatrix(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, const std::set<size_t>& groups, std::string distance_methode) throw (Exception);
+};
+} // end of namespace bpp;
+
+#endif // _MULTILOCUSGENOTYPESTATISTICS_H_
+
diff --git a/src/Bpp/PopGen/ODataSet.h b/src/Bpp/PopGen/ODataSet.h
new file mode 100644
index 0000000..61ce3a1
--- /dev/null
+++ b/src/Bpp/PopGen/ODataSet.h
@@ -0,0 +1,75 @@
+//
+// File ODataSet.h
+// Author : Sylvain Gaillard
+// Last modification : Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _ODATASET_H_
+#define _ODATASET_H_
+
+#include "IODataSet.h"
+
+#include <Bpp/Exceptions.h>
+
+namespace bpp
+{
+/**
+ * @brief The ODataSet interface.
+ *
+ * @author Sylvain Gaillard
+ */
+class ODataSet :
+ public virtual IODataSet
+{
+public:
+ // Class destructor
+ virtual ~ODataSet() {}
+
+public:
+ /**
+ * @brief Write a DataSet on ostream.
+ */
+ virtual void write(std::ostream& os, const DataSet& data_set) const throw (Exception) = 0;
+
+ /**
+ * @brief Write a DataSet in a text file.
+ */
+ virtual void write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception) = 0;
+};
+} // end of namespace bpp;
+
+#endif // _ODATASET_H_
+
diff --git a/src/Bpp/PopGen/PolymorphismMultiGContainer.cpp b/src/Bpp/PopGen/PolymorphismMultiGContainer.cpp
new file mode 100644
index 0000000..d7fdd0e
--- /dev/null
+++ b/src/Bpp/PopGen/PolymorphismMultiGContainer.cpp
@@ -0,0 +1,323 @@
+//
+// File PolymorphismMultiGContainer.cpp
+// Author : Sylvain Gaillard
+// Khalid Belkhir
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "PolymorphismMultiGContainer.h"
+
+using namespace bpp;
+using namespace std;
+
+// ** Constructors : **********************************************************/
+
+PolymorphismMultiGContainer::PolymorphismMultiGContainer() : multilocusGenotypes_(std::vector<MultilocusGenotype*>()),
+ groups_(std::vector<size_t>()),
+ groups_names_(std::map<size_t, std::string>()) {}
+
+PolymorphismMultiGContainer::PolymorphismMultiGContainer(const PolymorphismMultiGContainer& pmgc) : multilocusGenotypes_(std::vector<MultilocusGenotype*>(pmgc.size())),
+ groups_(std::vector<size_t>(pmgc.size())),
+ groups_names_(std::map<size_t, std::string>())
+{
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ multilocusGenotypes_[i] = new MultilocusGenotype(*pmgc.getMultilocusGenotype(i));
+ groups_[i] = pmgc.getGroupId(i);
+ }
+ set<size_t> grp_ids = pmgc.getAllGroupsIds();
+ for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
+ {
+ size_t id = *it;
+ string name = pmgc.getGroupName(id);
+ groups_names_[id] = name;
+ }
+}
+
+// ** Destructor : ************************************************************/
+
+PolymorphismMultiGContainer::~PolymorphismMultiGContainer()
+{
+ clear();
+}
+
+// ** Other methodes : ********************************************************/
+
+PolymorphismMultiGContainer& PolymorphismMultiGContainer::operator=(const PolymorphismMultiGContainer& pmgc)
+{
+ clear();
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ multilocusGenotypes_.push_back(new MultilocusGenotype(*pmgc.getMultilocusGenotype(i)));
+ groups_.push_back(pmgc.getGroupId(i));
+ }
+ set<size_t> grp_ids = pmgc.getAllGroupsIds();
+ for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
+ {
+ size_t id = *it;
+ string name = pmgc.getGroupName(id);
+ groups_names_[id] = name;
+ }
+
+ return *this;
+}
+
+/******************************************************************************/
+
+void PolymorphismMultiGContainer::addMultilocusGenotype(const MultilocusGenotype& mg, size_t group)
+{
+ multilocusGenotypes_.push_back(new MultilocusGenotype(mg));
+ groups_.push_back(group);
+ map<size_t, string>::const_iterator it = groups_names_.find(group);
+ if (!(it != groups_names_.end()) )
+ {
+ // ajouter ce groupe avec un nom vide
+ groups_names_[group] = "";
+ }
+}
+
+/******************************************************************************/
+
+const MultilocusGenotype* PolymorphismMultiGContainer::getMultilocusGenotype(size_t position) const throw (IndexOutOfBoundsException)
+{
+ if (position >= size())
+ throw IndexOutOfBoundsException("PolymorphismMultiGContainer::getMultilocusGenotype: position out of bounds.", position, 0, size() - 1);
+ return multilocusGenotypes_[position];
+}
+
+/******************************************************************************/
+
+MultilocusGenotype* PolymorphismMultiGContainer::removeMultilocusGenotype(size_t position) throw (IndexOutOfBoundsException)
+{
+ if (position >= size())
+ throw IndexOutOfBoundsException("PolymorphismMultiGContainer::removeMultilocusGenotype: position out of bounds.", position, 0, size() - 1);
+ MultilocusGenotype* tmp_mg = multilocusGenotypes_[position];
+ multilocusGenotypes_.erase(multilocusGenotypes_.begin() + position);
+ groups_.erase(groups_.begin() + position);
+ return tmp_mg;
+}
+
+/******************************************************************************/
+
+void PolymorphismMultiGContainer::deleteMultilocusGenotype(size_t position) throw (IndexOutOfBoundsException)
+{
+ if (position >= size())
+ throw IndexOutOfBoundsException("PolymorphismMultiGContainer::deleteMultilocusGenotype: position out of bounds.", position, 0, size() - 1);
+ delete multilocusGenotypes_[position];
+ multilocusGenotypes_.erase(multilocusGenotypes_.begin() + position);
+ groups_.erase(groups_.begin() + position);
+}
+
+/******************************************************************************/
+
+bool PolymorphismMultiGContainer::isAligned() const
+{
+ size_t value = 0;
+ for (size_t i = 0; i < size(); i++)
+ {
+ if (i == 0)
+ value = multilocusGenotypes_[i]->size();
+ else if (multilocusGenotypes_[i]->size() != value)
+ return false;
+ }
+ return true;
+}
+
+/******************************************************************************/
+
+size_t PolymorphismMultiGContainer::getNumberOfLoci() const throw (Exception)
+{
+ if (!isAligned())
+ throw Exception("MultilocusGenotypes are not aligned.");
+ if (size() < 1)
+ return 0;
+ return multilocusGenotypes_[0]->size();
+}
+
+/******************************************************************************/
+
+size_t PolymorphismMultiGContainer::getGroupId(size_t position) const throw (IndexOutOfBoundsException)
+{
+ if (position >= size())
+ throw IndexOutOfBoundsException("PolymorphismMultiGContainer::getGroupId: position out of bounds.", position, 0, size() - 1);
+ return groups_[position];
+}
+
+/******************************************************************************/
+
+void PolymorphismMultiGContainer::setGroupId(size_t position, size_t group_id) throw (IndexOutOfBoundsException)
+{
+ if (position >= size())
+ throw IndexOutOfBoundsException("PolymorphismMultiGContainer::setGroupId: position out of bounds.", position, 0, size() - 1);
+ groups_[position] = group_id;
+}
+
+/******************************************************************************/
+
+std::set<size_t> PolymorphismMultiGContainer::getAllGroupsIds() const
+{
+ set<size_t> groups_ids;
+ for (size_t i = 0; i < size(); i++)
+ {
+ groups_ids.insert(groups_[i]);
+ }
+ return groups_ids;
+}
+
+/******************************************************************************/
+
+std::vector<std::string> PolymorphismMultiGContainer::getAllGroupsNames() const
+{
+ vector<string> grps_names;
+ map<size_t, string>::const_iterator it;
+ for (it = groups_names_.begin(); it != groups_names_.end(); it++)
+ {
+ string name = it->second;
+ if (!name.empty())
+ grps_names.push_back(name);
+ else
+ grps_names.push_back(TextTools::toString(it->first) );
+ }
+
+ return grps_names;
+}
+
+/******************************************************************************/
+
+bool PolymorphismMultiGContainer::groupExists(size_t group) const
+{
+ for (size_t i = 0; i < size(); i++)
+ {
+ if (groups_[i] == group)
+ return true;
+ }
+ return false;
+}
+
+/******************************************************************************/
+
+size_t PolymorphismMultiGContainer::getNumberOfGroups() const
+{
+ return getAllGroupsIds().size();
+}
+
+/******************************************************************************/
+
+size_t PolymorphismMultiGContainer::getGroupSize(size_t group) const
+{
+ size_t counter = 0;
+ for (size_t i = 0; i < size(); i++)
+ {
+ if (groups_[i] == group)
+ counter++;
+ }
+ return counter;
+}
+
+/******************************************************************************/
+
+std::string PolymorphismMultiGContainer::getGroupName(size_t group_id) const throw (GroupNotFoundException)
+{
+ string name = TextTools::toString(group_id); // par defaut on retourne le n° de groupe
+ map<size_t, string>::const_iterator it = groups_names_.find(group_id);
+ if (it != groups_names_.end() )
+ name = it->second;
+ else
+ throw GroupNotFoundException("PolymorphismMultiGContainer::getGroupName: group not found.", group_id);
+ return name;
+}
+
+/******************************************************************************/
+
+void PolymorphismMultiGContainer::setGroupName(size_t group_id, std::string name) throw (GroupNotFoundException)
+{
+ map<size_t, string>::iterator it = groups_names_.find(group_id);
+ if (it != groups_names_.end() )
+ it->second = name;
+ else
+ throw GroupNotFoundException("PolymorphismMultiGContainer::getGroupName: group not found.", group_id);
+ return;
+}
+
+/******************************************************************************/
+
+void PolymorphismMultiGContainer::addGroupName(size_t group_id, std::string name)
+{
+ groups_names_[group_id] = name;
+ return;
+}
+
+/******************************************************************************/
+
+size_t PolymorphismMultiGContainer::getLocusGroupSize(size_t group, size_t locus_position) const
+{
+ size_t counter = 0;
+ for (size_t i = 0; i < size(); i++)
+ {
+ try
+ {
+ if (groups_[i] == group && !multilocusGenotypes_[i]->isMonolocusGenotypeMissing(locus_position))
+ counter++;
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("PolymorphismMultiGContainer::getGroupSize: locus_position out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
+ }
+ }
+ return counter;
+}
+
+/******************************************************************************/
+
+size_t PolymorphismMultiGContainer::size() const
+{
+ return multilocusGenotypes_.size();
+}
+
+/******************************************************************************/
+
+void PolymorphismMultiGContainer::clear()
+{
+ for (size_t i = 0; i < multilocusGenotypes_.size(); i++)
+ {
+ delete multilocusGenotypes_[i];
+ }
+ multilocusGenotypes_.clear();
+ groups_.clear();
+ groups_names_.clear();
+}
+
+/******************************************************************************/
+
diff --git a/src/Bpp/PopGen/PolymorphismMultiGContainer.h b/src/Bpp/PopGen/PolymorphismMultiGContainer.h
new file mode 100644
index 0000000..8e71ad8
--- /dev/null
+++ b/src/Bpp/PopGen/PolymorphismMultiGContainer.h
@@ -0,0 +1,206 @@
+//
+// File PolymorphismMultiGContainer.h
+// Author : Sylvain Gaillard
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _POLYMORPHYSMMULTIGCONTAINER_H_
+#define _POLYMORPHYSMMULTIGCONTAINER_H_
+
+// From Utils
+#include <Bpp/Clonable.h>
+#include <Bpp/Exceptions.h>
+#include <Bpp/Utils/MapTools.h>
+#include <Bpp/Text/TextTools.h>
+
+// From popgenlib
+#include "MultilocusGenotype.h"
+#include "GeneralExceptions.h"
+
+// From STL
+#include <string>
+#include <vector>
+#include <map>
+#include <set>
+
+namespace bpp
+{
+/**
+ * @brief The PolymorphismMultiGContainer class
+ *
+ * This class is a container of MultilocusGenotype.
+ *
+ * @author Sylvain Gaillard
+ */
+class PolymorphismMultiGContainer
+{
+private:
+ std::vector<MultilocusGenotype*> multilocusGenotypes_;
+ std::vector<size_t> groups_; // group id for each multilocusgenotype
+ std::map<size_t, std::string> groups_names_;
+
+public:
+ // Constructors and destructor
+ /**
+ * @brief Build a new PolymorphismMultilocusGenotypeContainer.
+ */
+ PolymorphismMultiGContainer();
+
+ /**
+ * @brief The copy constructor.
+ */
+ PolymorphismMultiGContainer(const PolymorphismMultiGContainer& pmgc);
+
+ /**
+ * @brief Destroy a PolymorphismMultilocusGenotypeContainer.
+ */
+ ~PolymorphismMultiGContainer();
+
+public:
+ /**
+ * @brief The assignation operator=.
+ */
+ PolymorphismMultiGContainer& operator=(const PolymorphismMultiGContainer& pmgc);
+
+ /**
+ * @brief Add a MultilocusGenotype to the container.
+ */
+ void addMultilocusGenotype(const MultilocusGenotype& mg, size_t group);
+
+ /**
+ * @brief Get a MultilocusGenotype at a position.
+ *
+ * @throw IndexOutOfBoundsException if position excedes the size of the container.
+ */
+ const MultilocusGenotype* getMultilocusGenotype(size_t position) const throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Remove a MultilocusGenotype.
+ *
+ * @throw IndexOutOfBoundsException if position excedes the size of the container.
+ */
+ MultilocusGenotype* removeMultilocusGenotype(size_t position) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Delete a MultilocusGenotype.
+ *
+ * @throw IndexOutOfBoundsException if position excedes the size of the container.
+ */
+ void deleteMultilocusGenotype(size_t position) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Tell if the MultilocusGenotypes are aligned (i.e. same size).
+ */
+ bool isAligned() const;
+
+ /**
+ * @brief Get the number of loci if the MultilocusGenotypes are aligned.
+ *
+ * @throw Exception if MultilocusGenotypes are not aligned.
+ */
+ size_t getNumberOfLoci() const throw (Exception);
+
+ /**
+ * @brief Get the Group id of a MultilocusGenotype.
+ *
+ * @throw IndexOutOfBoundsException if position excedes the size of the container.
+ */
+ size_t getGroupId(size_t position) const throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Set the Group id of a MultilocusGenotype.
+ *
+ * @throw IndexOutOfBoundsException if position excedes the size of the container.
+ */
+ void setGroupId(size_t position, size_t group_id) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the groups' ids.
+ */
+ std::set<size_t> getAllGroupsIds() const;
+
+ /**
+ * @brief Get the groups names or ids if not available
+ */
+ std::vector<std::string> getAllGroupsNames() const;
+
+ /**
+ * @brief Tell if a group exists.
+ */
+ bool groupExists(size_t group) const;
+
+ /**
+ * @brief Get the number of groups.
+ */
+ size_t getNumberOfGroups() const;
+
+ /**
+ * @brief Get group size.
+ */
+ size_t getGroupSize(size_t group) const;
+
+ /**
+ * @brief Get the group name for a given group id or just the id if not available juste return it's id
+ */
+ std::string getGroupName(size_t group_id) const throw (GroupNotFoundException);
+
+ /**
+ * @brief Set the name for the given group id.
+ */
+ void setGroupName(size_t group_id, std::string name) throw (GroupNotFoundException);
+
+ /**
+ * @brief Inserts a name for the given group id.
+ */
+ void addGroupName(size_t group_id, std::string name);
+
+ /**
+ * @brief Get the size of a group for a given locus.
+ */
+ size_t getLocusGroupSize(size_t group, size_t locus_position) const;
+
+ /**
+ * @brief Get the number of MultilocusGenotype.
+ */
+ size_t size() const;
+
+ /**
+ * @brief Clear the container.
+ */
+ void clear();
+};
+} // end of namespace bpp;
+
+#endif // _POLYMORPHYSMMULTIGCONTAINER_H_
diff --git a/src/Bpp/PopGen/PolymorphismMultiGContainerTools.cpp b/src/Bpp/PopGen/PolymorphismMultiGContainerTools.cpp
new file mode 100644
index 0000000..e4fba83
--- /dev/null
+++ b/src/Bpp/PopGen/PolymorphismMultiGContainerTools.cpp
@@ -0,0 +1,380 @@
+//
+// File PolymorphismMultiGContainerTools.cpp
+// Author : Sylvain Gailard
+// Khalid Belkhir
+// Last modification : june 15 2006
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "PolymorphismMultiGContainerTools.h"
+#include <algorithm>
+
+using namespace std;
+using namespace bpp;
+
+/******************************************************************************/
+
+PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutMultiG(const PolymorphismMultiGContainer& pmgc)
+{
+ PolymorphismMultiGContainer permuted_pmgc(pmgc);
+ vector<size_t> groups;
+ for (size_t i = 0; i < permuted_pmgc.size(); i++)
+ {
+ groups.push_back(permuted_pmgc.getGroupId(i));
+ }
+ // use std::random_shuffle instead of RandomTools::getSampl
+ // groups = RandomTools::getSample(groups, groups.size());
+ std::random_shuffle(groups.begin(), groups.end());
+ for (size_t i = 0; i < permuted_pmgc.size(); i++)
+ {
+ permuted_pmgc.setGroupId(i, groups[i]);
+ }
+ return permuted_pmgc;
+}
+
+/******************************************************************************/
+
+PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutMonoG(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups)
+{
+ PolymorphismMultiGContainer permuted_pmgc;
+ size_t loc_num = pmgc.getNumberOfLoci();
+ vector<vector<const MonolocusGenotype*> > mono_gens;
+ mono_gens.resize(loc_num);
+ // Get all the MonolocusGenotypes to permut
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ if (groups.find(pmgc.getGroupId(i)) != groups.end())
+ {
+ for (size_t j = 0; j < loc_num; j++)
+ {
+ mono_gens[j].push_back(&pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j));
+ }
+ }
+ }
+ // Permut the MonolocusGenotypes
+ for (size_t i = 0; i < loc_num; i++)
+ {
+ // mono_gens[i] = RandomTools::getSample(mono_gens[i], mono_gens[i].size());
+ std::random_shuffle(mono_gens[i].begin(), mono_gens[i].end());
+ }
+ // Build the new PolymorphismMultiGContainer
+ size_t k = 0;
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ if (groups.find(pmgc.getGroupId(i)) != groups.end())
+ {
+ MultilocusGenotype tmp_mg(loc_num);
+ for (size_t j = 0; j < loc_num; j++)
+ {
+ if (mono_gens[j][k] != NULL)
+ tmp_mg.setMonolocusGenotype(j, *(mono_gens[j][k]));
+ }
+ permuted_pmgc.addMultilocusGenotype(tmp_mg, pmgc.getGroupId(i));
+ k++;
+ }
+ else
+ {
+ permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), pmgc.getGroupId(i));
+ }
+ }
+
+ // update groups names
+ set<size_t> grp_ids = pmgc.getAllGroupsIds();
+ for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
+ {
+ size_t id = *it;
+ string name = pmgc.getGroupName(id);
+ permuted_pmgc.setGroupName(id, name);
+ }
+
+ return permuted_pmgc;
+}
+
+/******************************************************************************/
+
+PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutIntraGroupMonoG(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups)
+{
+ PolymorphismMultiGContainer permuted_pmgc;
+ size_t loc_num = pmgc.getNumberOfLoci();
+ vector<vector<const MonolocusGenotype*> > mono_gens;
+ mono_gens.resize(loc_num);
+
+ for (set<size_t>::const_iterator g = groups.begin(); g != groups.end(); g++) // for each group
+ {
+ size_t nb_ind_in_group = 0;
+ // Get all the MonolocusGenotypes of group g to permut
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ size_t indiv_grp = pmgc.getGroupId(i);
+ if (groups.find(indiv_grp) != groups.end())
+ {
+ if (indiv_grp == *g)
+ {
+ nb_ind_in_group++;
+
+ for (size_t j = 0; j < loc_num; j++)
+ {
+ mono_gens[j].push_back(&pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j));
+ }
+ }
+ }
+ else // insert as is
+ {
+ permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), indiv_grp);
+ }
+ } // for i
+
+ // Permut the MonolocusGenotypes
+ if (nb_ind_in_group > 0)
+ {
+ for (size_t j = 0; j < loc_num; j++)
+ {
+ // mono_gens[j] = RandomTools::getSample(mono_gens[j], mono_gens[j].size());
+ std::random_shuffle(mono_gens[j].begin(), mono_gens[j].end());
+ }
+
+ // Build the new multilocus genotypes
+ MultilocusGenotype tmp_mg(loc_num);
+ for (size_t k = 0; k < nb_ind_in_group; k++)
+ {
+ for (size_t j = 0; j < loc_num; j++)
+ {
+ if (mono_gens[j][k] != NULL)
+ tmp_mg.setMonolocusGenotype(j, *(mono_gens[j][k]));
+ } // for j
+
+ permuted_pmgc.addMultilocusGenotype(tmp_mg, (*g));
+ } // for k
+ } // if nb_ind_in_group
+ } // for g
+
+ // update groups names
+ set<size_t> grp_ids = pmgc.getAllGroupsIds();
+ for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
+ {
+ size_t id = *it;
+ string name = pmgc.getGroupName(id);
+ permuted_pmgc.setGroupName(id, name);
+ }
+
+ return permuted_pmgc;
+}
+
+/******************************************************************************/
+
+PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutAlleles(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups)
+{
+ PolymorphismMultiGContainer permuted_pmgc;
+ size_t loc_num = pmgc.getNumberOfLoci();
+ vector<vector<size_t> > alleles;
+ alleles.resize(loc_num);
+ // Get all the alleles to permut
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ if (groups.find(pmgc.getGroupId(i)) != groups.end())
+ {
+ for (size_t j = 0; j < loc_num; j++)
+ {
+ if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(j))
+ for (size_t k = 0; k < pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size(); k++)
+ {
+ alleles[j].push_back(pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex()[k]);
+ }
+ }
+ }
+ }
+ // Permut the alleles
+ for (size_t i = 0; i < loc_num; i++)
+ {
+ // alleles[i] = RandomTools::getSample(alleles[i], alleles[i].size());
+ std::random_shuffle(alleles[i].begin(), alleles[i].end());
+ }
+ // Build the new PolymorphismMultiGContainer
+ vector<size_t> k(loc_num, 0);
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ if (groups.find(pmgc.getGroupId(i)) != groups.end())
+ {
+ MultilocusGenotype tmp_mg(loc_num);
+ for (size_t j = 0; j < loc_num; j++)
+ {
+ if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(j))
+ {
+ if (pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size() == 1)
+ tmp_mg.setMonolocusGenotype(j, MonoAlleleMonolocusGenotype(alleles[j][k[j]++]));
+ if (pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size() == 2)
+ tmp_mg.setMonolocusGenotype(j, BiAlleleMonolocusGenotype(alleles[j][k[j]++], alleles[j][k[j]++]));
+ }
+ }
+ permuted_pmgc.addMultilocusGenotype(tmp_mg, pmgc.getGroupId(i));
+ }
+ else
+ {
+ permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), pmgc.getGroupId(i));
+ }
+ }
+
+ // update groups names
+ set<size_t> grp_ids = pmgc.getAllGroupsIds();
+ for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
+ {
+ size_t id = *it;
+ string name = pmgc.getGroupName(id);
+ permuted_pmgc.setGroupName(id, name);
+ }
+
+ return permuted_pmgc;
+}
+
+/******************************************************************************/
+
+PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutIntraGroupAlleles(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups)
+{
+ PolymorphismMultiGContainer permuted_pmgc;
+ size_t loc_num = pmgc.getNumberOfLoci();
+ vector<vector<size_t> > alleles;
+ alleles.resize(loc_num);
+
+ for (set<size_t>::const_iterator g = groups.begin(); g != groups.end(); g++) // for each group
+ {
+ int nb_ind_in_group = 0;
+
+ vector< vector<size_t> > nb_alleles_for_inds;
+ nb_alleles_for_inds.resize(loc_num);
+ // Get all the alleles to permut
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ size_t indiv_grp = pmgc.getGroupId(i);
+ if (groups.find(indiv_grp) != groups.end() )
+ {
+ if (indiv_grp == *g)
+ {
+ nb_ind_in_group++;
+ for (size_t j = 0; j < loc_num; j++)
+ {
+ if (!pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(j))
+ {
+ size_t nb_alls = pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size();
+ nb_alleles_for_inds[j].push_back(nb_alls);
+ for (size_t k = 0; k < nb_alls; k++)
+ {
+ alleles[j].push_back(pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex()[k]);
+ }
+ }
+ }
+ }
+ }
+ else // inserer tel quel
+ {
+ permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), indiv_grp);
+ }
+ } // for i
+
+ // Permut the alleles
+ if (nb_ind_in_group > 0)
+ {
+ for (size_t i = 0; i < loc_num; i++)
+ {
+ // alleles[i] = RandomTools::getSample(alleles[i], alleles[i].size());
+ std::random_shuffle(alleles[i].begin(), alleles[i].end());
+ }
+
+ // Build the new PolymorphismMultiGContainer
+ vector<size_t> k(loc_num, 0);
+
+ for (int ind = 0; ind < nb_ind_in_group; ind++)
+ {
+ MultilocusGenotype tmp_mg(loc_num);
+ for (size_t j = 0; j < loc_num; j++)
+ {
+ if (nb_alleles_for_inds[j][ind] == 1)
+ tmp_mg.setMonolocusGenotype(j, MonoAlleleMonolocusGenotype(alleles[j][k[j]++]));
+ if (nb_alleles_for_inds[j][ind] == 2)
+ tmp_mg.setMonolocusGenotype(j, BiAlleleMonolocusGenotype(alleles[j][k[j]++], alleles[j][k[j]++]));
+ } // for j
+
+ permuted_pmgc.addMultilocusGenotype(tmp_mg, (*g));
+ } // for ind
+ } // if nb_ind_in_group
+ } // for g
+
+
+ // update groups names
+ set<size_t> grp_ids = pmgc.getAllGroupsIds();
+ for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
+ {
+ size_t id = *it;
+ string name = pmgc.getGroupName(id);
+ permuted_pmgc.setGroupName(id, name);
+ }
+
+ return permuted_pmgc;
+}
+
+/******************************************************************************/
+
+PolymorphismMultiGContainer PolymorphismMultiGContainerTools::extractGroups(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups)
+{
+ PolymorphismMultiGContainer sub_pmgc;
+ for (set<size_t>::const_iterator g = groups.begin(); g != groups.end(); g++) // for each group
+ {
+ // Get all the MonolocusGenotypes of group g to extract
+ for (size_t i = 0; i < pmgc.size(); i++)
+ {
+ size_t indiv_grp = pmgc.getGroupId(i);
+ if (groups.find(indiv_grp) != groups.end() )
+ {
+ if (indiv_grp == *g)
+ {
+ sub_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), indiv_grp);
+ }
+ }
+ } // for i
+ } // for g
+
+ // update groups names
+ set<size_t> grp_ids = sub_pmgc.getAllGroupsIds();
+ for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
+ {
+ size_t id = *it;
+ string name = pmgc.getGroupName(id);
+ sub_pmgc.setGroupName(id, name);
+ }
+
+ return sub_pmgc;
+}
+
+/******************************************************************************/
+
diff --git a/src/Bpp/PopGen/PolymorphismMultiGContainerTools.h b/src/Bpp/PopGen/PolymorphismMultiGContainerTools.h
new file mode 100644
index 0000000..eef3ba8
--- /dev/null
+++ b/src/Bpp/PopGen/PolymorphismMultiGContainerTools.h
@@ -0,0 +1,123 @@
+//
+// File PolymorphismMultiGContainerTools.h
+// Authors : Sylvain Gailard
+// Khalid Belkhir
+// Last modification : june 15 2006
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _POLYMORPHISMMULTIGCONTAINERTOOLS_H_
+#define _POLYMORPHISMMULTIGCONTAINERTOOLS_H_
+
+// From the STL
+#include <set>
+
+// From the PolGenLib library
+#include "PolymorphismMultiGContainer.h"
+
+#include <Bpp/Numeric/Random/RandomTools.h>
+
+namespace bpp
+{
+/**
+ * @brief Tools for PolymorphismMultiGContainer.
+ *
+ * Provides static methods for permutations.
+ *
+ * @author Sylvain Gaillard
+ */
+class PolymorphismMultiGContainerTools
+{
+public:
+ /**
+ * @brief Permut the MultilocusGenotype in the whole PolymorphismMultiGContainer.
+ *
+ * @param pmgc The PolymorphismMultiGContainer to permut.
+ * @return A permuted PolymorphismMultiGContainer.
+ */
+ static PolymorphismMultiGContainer permutMultiG(const PolymorphismMultiGContainer& pmgc);
+
+ /**
+ * @brief Permut the MonolocusGenotype.
+ *
+ * Permut the MonolocusGenotypes in one or several groups breaking
+ * the links between them.
+ *
+ * @param pmgc The PolymorphismMultiGContainer to permut.
+ * @param groups The groups ids between which the MonolocusGenotypes will be permuted.
+ * @return A permuted PolymorphismMultiGContainer.
+ */
+ static PolymorphismMultiGContainer permutMonoG(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups);
+
+ /**
+ * @brief Permut the MonolocusGenotype between individuals in the same group.
+ *
+ * Permut the MonolocusGenotypes for a set of groups. The idiv for the other groups
+ * are kept intact
+ *
+ * @param pmgc The PolymorphismMultiGContainer to permut.
+ * @param groups The groups ids for which the MonolocusGenotypes will be permuted.
+ * @return A permuted PolymorphismMultiGContainer.
+ */
+ static PolymorphismMultiGContainer permutIntraGroupMonoG(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups);
+
+ /**
+ * @brief Permut the Alleles.
+ *
+ * Permut the alleles in one or several groups breaking
+ * the links between them.
+ *
+ * @param pmgc The PolymorphismMultiGContainer to permut.
+ * @param groups The groups ids between which the MonolocusGenotypes will be permuted.
+ * @return A permuted PolymorphismMultiGContainer.
+ */
+ static PolymorphismMultiGContainer permutAlleles(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups);
+
+ /**
+ * @brief Permut the Alleles between individuals in the same group.
+ *
+ * Permut the alleles in one or several groups
+ *
+ * @param pmgc The PolymorphismMultiGContainer to permut.
+ * @param groups The groups ids between which the MonolocusGenotypes will be permuted.
+ * @return A permuted PolymorphismMultiGContainer.
+ */
+ static PolymorphismMultiGContainer permutIntraGroupAlleles(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups);
+ static PolymorphismMultiGContainer extractGroups(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups);
+};
+} // end of namespace bpp;
+
+#endif // _POLYMORPHISMMULTIGCONTAINERTOOLS_H_
+
diff --git a/src/Bpp/PopGen/PolymorphismSequenceContainer.cpp b/src/Bpp/PopGen/PolymorphismSequenceContainer.cpp
new file mode 100644
index 0000000..2469348
--- /dev/null
+++ b/src/Bpp/PopGen/PolymorphismSequenceContainer.cpp
@@ -0,0 +1,447 @@
+//
+// File: PolymorphismSequenceContainer.h
+// Created by: Eric Bazin
+// Sylvain Gaillard
+// Created on: Wednesday August 04 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "PolymorphismSequenceContainer.h"
+
+using namespace bpp;
+using namespace std;
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer::PolymorphismSequenceContainer(const Alphabet* alpha) :
+ VectorSiteContainer(alpha),
+ ingroup_(vector<bool>()),
+ count_(vector<size_t>()),
+ group_(vector<size_t>()) {}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer::PolymorphismSequenceContainer(size_t size, const Alphabet* alpha) :
+ VectorSiteContainer(size, alpha),
+ ingroup_(vector<bool>(size)),
+ count_(vector<size_t>(size)),
+ group_(vector<size_t>(size)) {}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer::PolymorphismSequenceContainer(const OrderedSequenceContainer& sc) :
+ VectorSiteContainer(sc),
+ ingroup_(vector<bool>(sc.getNumberOfSequences(), true)),
+ count_(vector<size_t>(sc.getNumberOfSequences(), 1)),
+ group_(vector<size_t>(sc.getNumberOfSequences(), 1)) {}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer::PolymorphismSequenceContainer(const SiteContainer& sc) :
+ VectorSiteContainer(sc),
+ ingroup_(vector<bool>(sc.getNumberOfSequences(), true)),
+ count_(vector<size_t>(sc.getNumberOfSequences(), 1)),
+ group_(vector<size_t>(sc.getNumberOfSequences(), 1)) {}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer::PolymorphismSequenceContainer(const PolymorphismSequenceContainer& psc) :
+ VectorSiteContainer(psc),
+ ingroup_(vector<bool>(psc.getNumberOfSequences())),
+ count_(vector<size_t>(psc.getNumberOfSequences())),
+ group_(vector<size_t>(psc.getNumberOfSequences()))
+{
+ for (size_t i = 0; i < psc.getNumberOfSequences(); i++)
+ {
+ count_[i] = psc.getSequenceCount(i);
+ ingroup_[i] = psc.isIngroupMember(i);
+ group_[i] = psc.getGroupId(i);
+ }
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer& PolymorphismSequenceContainer::operator=(const PolymorphismSequenceContainer& psc)
+{
+ VectorSiteContainer::operator=(psc);
+ // Setting up the sequences comments, numbers and ingroup state
+ size_t nbSeq = psc.getNumberOfSequences();
+ count_.resize(nbSeq);
+ ingroup_.resize(nbSeq);
+ group_.resize(nbSeq);
+ for (size_t i = 0; i < nbSeq; i++)
+ {
+ count_[i] = psc.getSequenceCount(i);
+ ingroup_[i] = psc.isIngroupMember(i);
+ group_[i] = psc.getGroupId(i);
+ }
+ return *this;
+}
+
+/******************************************************************************/
+
+// ** Class destructor: *******************************************************/
+
+PolymorphismSequenceContainer::~PolymorphismSequenceContainer()
+{
+ clear();
+}
+
+/******************************************************************************/
+
+// ** Other methodes: *********************************************************/
+
+Sequence* PolymorphismSequenceContainer::removeSequence(size_t index) throw (IndexOutOfBoundsException)
+{
+ if (index >= getNumberOfSequences())
+ throw IndexOutOfBoundsException("PolymorphismSequenceContainer::removeSequence: index out of bounds.", index, 0, getNumberOfSequences());
+ count_.erase(count_.begin() + index);
+ ingroup_.erase(ingroup_.begin() + index);
+ group_.erase(group_.begin() + index);
+ return VectorSiteContainer::removeSequence(index);
+}
+
+/******************************************************************************/
+
+Sequence* PolymorphismSequenceContainer::removeSequence(const std::string& name) throw (SequenceNotFoundException)
+{
+ try
+ {
+ return removeSequence(getSequencePosition(name));
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("PolymorphismSequenceContainer::removeSequence.", name);
+ }
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::deleteSequence(size_t index) throw (IndexOutOfBoundsException)
+{
+ try
+ {
+ delete removeSequence(index);
+ }
+ catch (IndexOutOfBoundsException& ioobe)
+ {
+ throw IndexOutOfBoundsException("PolymorphismSequenceContainer::deleteSequence.", index, 0, getNumberOfSequences());
+ }
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::deleteSequence(const std::string& name) throw (SequenceNotFoundException)
+{
+ try
+ {
+ delete removeSequence(name);
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("PolymorphismSequenceContainer::deleteSequence.", name);
+ }
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::addSequence(const Sequence& sequence, size_t effectif, bool checkNames) throw (Exception)
+{
+ try
+ {
+ VectorSiteContainer::addSequence(sequence, checkNames);
+ }
+ catch (Exception& e)
+ {
+ throw e;
+ }
+ count_.push_back(effectif);
+ ingroup_.push_back(true);
+ group_.push_back(0);
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::clear()
+{
+ VectorSiteContainer::clear();
+ count_.clear();
+ ingroup_.clear();
+ group_.clear();
+}
+
+/******************************************************************************/
+
+size_t PolymorphismSequenceContainer::getGroupId(size_t index) const throw (IndexOutOfBoundsException)
+{
+ if (index >= getNumberOfSequences())
+ throw IndexOutOfBoundsException("PolymorphismSequenceContainer::getGroupId: index out of bounds.", index, 0, getNumberOfSequences());
+ return group_[index];
+}
+
+/******************************************************************************/
+
+size_t PolymorphismSequenceContainer::getGroupId(const std::string& name) const throw (SequenceNotFoundException)
+{
+ try
+ {
+ return group_[getSequencePosition(name)];
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("PolymorphismSequenceContainer::getGroupId.", name);
+ }
+}
+
+/******************************************************************************/
+
+std::set<size_t> PolymorphismSequenceContainer::getAllGroupsIds() const
+{
+ set<size_t> grp_ids;
+ for (size_t i = 0; i < group_.size(); i++)
+ {
+ grp_ids.insert(group_[i]);
+ }
+ return grp_ids;
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::setGroupId(size_t index, size_t group_id) throw (IndexOutOfBoundsException)
+{
+ if (index >= getNumberOfSequences())
+ throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setGroupId: index out of bounds.", index, 0, getNumberOfSequences());
+ group_[index] = group_id;
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::setGroupId(const std::string& name, size_t group_id) throw (SequenceNotFoundException)
+{
+ try
+ {
+ group_[getSequencePosition(name)] = group_id;
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("PolymorphismSequenceContainer::setGroupId.", name);
+ }
+}
+
+/******************************************************************************/
+
+size_t PolymorphismSequenceContainer::getNumberOfGroups() const
+{
+ return getAllGroupsIds().size();
+}
+
+/******************************************************************************/
+
+bool PolymorphismSequenceContainer::isIngroupMember(size_t index) const throw (IndexOutOfBoundsException)
+{
+ if (index >= getNumberOfSequences())
+ throw IndexOutOfBoundsException("PolymorphismSequenceContainer::isIngroupMember: index out of bounds.", index, 0, getNumberOfSequences());
+ return ingroup_[index];
+}
+
+/******************************************************************************/
+
+bool PolymorphismSequenceContainer::isIngroupMember(const std::string& name) const throw (SequenceNotFoundException)
+{
+ try
+ {
+ return ingroup_[getSequencePosition(name)];
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("PolymorphismSequenceContainer::isIngroupMember.", name);
+ }
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::setAsIngroupMember(size_t index) throw (IndexOutOfBoundsException)
+{
+ if (index >= getNumberOfSequences())
+ throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setAsIngroupMember.", index, 0, getNumberOfSequences());
+ ingroup_[index] = true;
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::setAsIngroupMember(const std::string& name) throw (SequenceNotFoundException)
+{
+ try
+ {
+ size_t seqPos = getSequencePosition(name);
+ ingroup_[seqPos] = true;
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("PolymorphismSequenceContainer::setAsIngroupMember.", name);
+ }
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::setAsOutgroupMember(size_t index) throw (IndexOutOfBoundsException)
+{
+ if (index >= getNumberOfSequences())
+ throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setAsOutgroupMember.", index, 0, getNumberOfSequences());
+ ingroup_[index] = false;
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::setAsOutgroupMember(const std::string& name) throw (SequenceNotFoundException)
+{
+ try
+ {
+ size_t seqPos = getSequencePosition(name);
+ ingroup_[seqPos] = false;
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("PolymorphismSequenceContainer::setAsOutgroupMember.", name);
+ }
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::setSequenceCount(size_t index, size_t count) throw (Exception)
+{
+ if (index >= getNumberOfSequences())
+ throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setSequenceCount.", index, 0, getNumberOfSequences());
+ if (count < 1)
+ throw BadIntegerException("PolymorphismSequenceContainer::setSequenceCount: count can't be < 1.", static_cast<int>(count));
+ count_[index] = count;
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::setSequenceCount(const std::string& name, size_t count) throw (Exception)
+{
+ try
+ {
+ setSequenceCount(getSequencePosition(name), count);
+ }
+ catch (BadIntegerException& bie)
+ {
+ throw bie;
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("PolymorphismSequenceContainer::setSequenceCount.", name);
+ }
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::incrementSequenceCount(size_t index) throw (IndexOutOfBoundsException)
+{
+ if (index >= getNumberOfSequences())
+ throw IndexOutOfBoundsException("PolymorphismSequenceContainer::incrementSequenceCount.", index, 0, getNumberOfSequences());
+ count_[index]++;
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::incrementSequenceCount(const std::string& name) throw (SequenceNotFoundException)
+{
+ try
+ {
+ incrementSequenceCount(getSequencePosition(name));
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("PolymorphismSequenceContainer::incrementSequenceCount.", name);
+ }
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::decrementSequenceCount(size_t index) throw (Exception)
+{
+ if (index >= getNumberOfSequences())
+ throw IndexOutOfBoundsException("PolymorphismSequenceContainer::decrementSequenceCount.", index, 0, getNumberOfSequences());
+ if (count_[index] - 1 < 1)
+ throw BadIntegerException("PolymorphismSequenceContainer::decrementSequenceCount: count can't be < 1.", static_cast<int>(count_[index] - 1));
+ count_[index]--;
+}
+
+/******************************************************************************/
+
+void PolymorphismSequenceContainer::decrementSequenceCount(const std::string& name) throw (Exception)
+{
+ try
+ {
+ decrementSequenceCount(getSequencePosition(name));
+ }
+ catch (BadIntegerException& bie)
+ {
+ throw bie;
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("PolymorphismSequenceContainer::decrementSequenceCount.", name);
+ }
+}
+
+/******************************************************************************/
+
+size_t PolymorphismSequenceContainer::getSequenceCount(size_t index) const throw (IndexOutOfBoundsException)
+{
+ if (index >= getNumberOfSequences())
+ throw IndexOutOfBoundsException("PolymorphismSequenceContainer::getSequenceCount.", index, 0, getNumberOfSequences());
+ return count_[index];
+}
+
+/******************************************************************************/
+
+size_t PolymorphismSequenceContainer::getSequenceCount(const std::string& name) const throw (SequenceNotFoundException)
+{
+ try
+ {
+ return getSequenceCount(getSequencePosition(name));
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ throw SequenceNotFoundException("PolymorphismSequenceContainer::getSequenceCount.", name);
+ }
+}
+
+/******************************************************************************/
+
diff --git a/src/Bpp/PopGen/PolymorphismSequenceContainer.h b/src/Bpp/PopGen/PolymorphismSequenceContainer.h
new file mode 100644
index 0000000..5aeec8d
--- /dev/null
+++ b/src/Bpp/PopGen/PolymorphismSequenceContainer.h
@@ -0,0 +1,342 @@
+//
+// File: PolymorphismSequenceContainer.h
+// Authors: Eric Bazin
+// Sylvain Gaillard
+// Created on: Wednesday August 04 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _POLYMORPHISMSEQUENCECONTAINER_H_
+#define _POLYMORPHISMSEQUENCECONTAINER_H_
+
+#include <set>
+#include <string>
+
+#include <Bpp/Clonable.h>
+#include <Bpp/Text/StringTokenizer.h>
+#include <Bpp/Text/TextTools.h>
+
+#include <Bpp/Seq/Container/VectorSiteContainer.h>
+#include <Bpp/Seq/Container/SequenceContainerTools.h>
+
+/**
+ * @mainpage
+ *
+ * @par
+ * The PopGenLib library provides classes for population genetics analysis.
+ * It makes intensive use of the SeqLib library, and adds a dedicated container
+ * named bpp::PolymorphismSequenceContainer, which associates frequencies to the
+ * sequences in the set. The bpp::PolymorphismSequenceContainerTools and
+ * bpp::SequenceStatistics static classes provide several tools for data analysis,
+ * including diversity indices and positive selection tests.
+ *
+ * @section dataset Population and sample data storage and manipulation
+ *
+ * @par
+ * PopGenLib library provides data structure for handling sample and data sets
+ * for population genetics.
+ * These objects are embedded in the bpp::DataSet object which is a container of bpp::Group
+ * of bpp::Individual.
+ * Each bpp::Individual can store bpp::Sequence data or allelic data with the dedicated
+ * classes bpp::MultilocusGenotype.
+ *
+ * @section genetics Population genetics data and statistics
+ *
+ * @par
+ * To compute statistics on data, two containers families are provided, one for sequences
+ * (bpp::PolymorphismSequenceContainer) and the other for allelic data (bpp::PolymorphismMultiGContainer).
+ * Static tools class for both families are provided to compute several common or less
+ * common statistics.
+ *
+ * @section statistics Statistics overview
+ *
+ * @par heterozygosity
+ * @par watterson75 Diversity estimator Theta of Watterson
+ * @par tajima83 Diversity estimator Theta of Tajima
+ * @par DVH Haplotype diversity of Depaulis and Veuille
+ * @par D Tajima's D test
+ */
+
+namespace bpp
+{
+/**
+ * @brief The PolymorphismSequenceContainer class.
+ *
+ * This is a VectorSiteContainer with effectif for each sequence.
+ * It also has flag for ingroup and outgroup.
+ *
+ * @author Sylvain Gaillard
+ */
+class PolymorphismSequenceContainer :
+ public VectorSiteContainer
+{
+private:
+ std::vector<bool> ingroup_;
+ std::vector<size_t> count_;
+ std::vector<size_t> group_;
+
+public:
+ // Constructors and destructor
+ /**
+ * @brief Build a new empty PolymorphismSequenceContainer.
+ */
+ PolymorphismSequenceContainer(const Alphabet* alpha);
+
+ /**
+ * @brief Build a new empty PolymorphismSequenceContainer of given size.
+ */
+ PolymorphismSequenceContainer(size_t size, const Alphabet* alpha);
+
+ /**
+ * @brief Build a PolymorphismSequenceContainer by copying data from an OrderedSequenceContainer.
+ */
+ PolymorphismSequenceContainer(const OrderedSequenceContainer& sc);
+
+ /**
+ * @brief Build a PolymorphismSequenceContainer by copying data from a SiteContainer.
+ */
+ PolymorphismSequenceContainer(const SiteContainer& sc);
+
+ /**
+ * @brief Copy constructor.
+ */
+ PolymorphismSequenceContainer(const PolymorphismSequenceContainer& psc);
+
+ /**
+ * @brief Operator= : copy operator.
+ */
+ PolymorphismSequenceContainer& operator=(const PolymorphismSequenceContainer& psc);
+
+ /**
+ * @brief Destroy a PolymorphismSequenceContainer.
+ */
+ virtual ~PolymorphismSequenceContainer();
+
+ /**
+ * @brief Clone a PolymorphismSequenceContainer.
+ */
+ PolymorphismSequenceContainer* clone() const
+ {
+ return new PolymorphismSequenceContainer(*this);
+ }
+
+public:
+ // Other methods
+ /**
+ * @brief Remove a sequence by index and return a pointer to this removed sequence.
+ *
+ * @throw IndexOutOfBoundsException if index excedes the number of sequences.
+ */
+ Sequence* removeSequence(size_t index) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Remove a sequence by name and return a pointer to this removed sequence.
+ *
+ * @throw SequenceNotFoundException if name is not found among the sequences' names.
+ */
+ Sequence* removeSequence(const std::string& name) throw (SequenceNotFoundException);
+
+ /**
+ * @brief Delete a sequence by index.
+ *
+ * @throw IndexOutOfBoundsException if index excedes the number of sequences.
+ */
+ void deleteSequence(size_t index) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Delete a sequence by name.
+ *
+ * @throw SequenceNotFoundException if name is not found among the sequences' names.
+ */
+ void deleteSequence(const std::string& name) throw (SequenceNotFoundException);
+
+ /**
+ * @brief Add a sequence to the container.
+ *
+ * @throw AlphabetMismatchException if the sequence's alphabet doesn't match the container's alphabet.
+ * @throw SequenceException if the sequence's size doesn't match the sequence's size of the container.
+ * @throw SequenceException if the sequence's name already exists in the container.
+ */
+ void addSequence(const Sequence& sequence, size_t effectif = 1, bool checkNames = true) throw (Exception);
+
+ /**
+ * @brief Clear the container of all its sequences.
+ */
+ void clear();
+
+ /**
+ * @brief Get the group identifier of the sequence.
+ *
+ * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container.
+ */
+ size_t getGroupId(size_t index) const throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the group identifier of a sequence.
+ *
+ * @throw SequenceNotFoundException if name is not found among the sequences' names.
+ */
+ size_t getGroupId(const std::string& name) const throw (SequenceNotFoundException);
+
+ /**
+ * @brief Get all the groups identifiers.
+ */
+ std::set<size_t> getAllGroupsIds() const;
+
+ /**
+ * @brief Set the group identifier of a sequence.
+ *
+ * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container.
+ */
+ void setGroupId(size_t index, size_t group_id) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Set the group identifier of a sequence.
+ *
+ * @throw SequenceNotFoundException if name is not found among the sequences' names.
+ */
+ void setGroupId(const std::string& name, size_t group_id) throw (SequenceNotFoundException);
+
+ /**
+ * @brief Get the number of groups.
+ */
+ size_t getNumberOfGroups() const;
+
+ /**
+ * @brief Tell if the sequence is ingroup by index.
+ *
+ * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container.
+ */
+ bool isIngroupMember(size_t index) const throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Tell if a sequence is ingroup by name.
+ *
+ * @throw SequenceNotFoundException if name is not found among the sequences' names.
+ */
+ bool isIngroupMember(const std::string& name) const throw (SequenceNotFoundException);
+
+ /**
+ * @brief Set a sequence as ingroup member by index.
+ *
+ * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container.
+ */
+ void setAsIngroupMember(size_t index) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Set a sequence as ingroup member by name.
+ *
+ * @throw SequenceNotFoundException if name is not found among the sequences' names.
+ */
+ void setAsIngroupMember(const std::string& name) throw (SequenceNotFoundException);
+
+ /**
+ * @brief Set a sequence as outgroup member by index.
+ *
+ * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container.
+ */
+ void setAsOutgroupMember(size_t index) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Set a sequence as outgroup member by name.
+ *
+ * @throw SequenceNotFoundException if name is not found among the sequences' names.
+ */
+ void setAsOutgroupMember(const std::string& name) throw (SequenceNotFoundException);
+
+ /**
+ * @brief Set the count of a sequence by index.
+ *
+ * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container.
+ * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0.
+ */
+ void setSequenceCount(size_t index, size_t count) throw (Exception);
+
+ /**
+ * @brief Set the count of a sequence by name.
+ *
+ * @throw throw SequenceNotFoundException if name is not found among the sequences' names.
+ * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0.
+ */
+ void setSequenceCount(const std::string& name, size_t count) throw (Exception);
+
+ /**
+ * @brief Add 1 to the sequence count.
+ *
+ * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container.
+ */
+ void incrementSequenceCount(size_t index) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Add 1 to the sequence count.
+ *
+ * @throw throw SequenceNotFoundException if name is not found among the sequences' names.
+ */
+ void incrementSequenceCount(const std::string& name) throw (SequenceNotFoundException);
+
+ /**
+ * @brief Remove 1 to the sequence count.
+ *
+ * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container.
+ * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0.
+ */
+ void decrementSequenceCount(size_t index) throw (Exception);
+
+ /**
+ * @brief Remove 1 to the sequence count.
+ *
+ * @throw throw SequenceNotFoundException if name is not found among the sequences' names.
+ * @throw BadIntegerException if count < 1 ... use deleteSequence instead of setting the count to 0.
+ */
+ void decrementSequenceCount(const std::string& name) throw (Exception);
+
+ /**
+ * @brief Get the count of a sequence by index.
+ *
+ * @throw IndexOutOfBoundsException if index excedes the number of sequences in the container.
+ */
+ size_t getSequenceCount(size_t index) const throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get the count of a sequence by name.
+ *
+ * @throw SequenceNotFoundException if name is not found among the sequences' names.
+ */
+ size_t getSequenceCount(const std::string& name) const throw (SequenceNotFoundException);
+};
+} // end of namespace bpp;
+
+#endif // _POLYMORPHISMSEQUENCECONTAINER_H_
+
diff --git a/src/Bpp/PopGen/PolymorphismSequenceContainerTools.cpp b/src/Bpp/PopGen/PolymorphismSequenceContainerTools.cpp
new file mode 100644
index 0000000..1193ac3
--- /dev/null
+++ b/src/Bpp/PopGen/PolymorphismSequenceContainerTools.cpp
@@ -0,0 +1,582 @@
+//
+// File: PolymorphismSequenceContainerTools.cpp
+// Authors: Eric Bazin
+// Sylvain Gaillard
+// Created on: Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "PolymorphismSequenceContainerTools.h"
+
+using namespace bpp;
+using namespace std;
+
+PolymorphismSequenceContainerTools::~PolymorphismSequenceContainerTools() {}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::read(const std::string& path, const Alphabet* alpha) throw (Exception)
+{
+ Mase ms;
+ string key;
+ const OrderedSequenceContainer* seqc = 0;
+ try
+ {
+ seqc = dynamic_cast<OrderedSequenceContainer*>(ms.readSequences(path, alpha ));
+ }
+ catch (Exception& e)
+ {
+ if (seqc != 0)
+ delete seqc;
+ throw e;
+ }
+ PolymorphismSequenceContainer* psc = new PolymorphismSequenceContainer(*seqc);
+ Comments maseFileHeader = seqc->getGeneralComments();
+ delete seqc;
+ map<string, size_t> groupMap = MaseTools::getAvailableSequenceSelections(maseFileHeader);
+ for (map<string, size_t>::iterator mi = groupMap.begin(); mi != groupMap.end(); mi++)
+ {
+ key = mi->first;
+ if (key.compare(0, 8, "OUTGROUP") == 0)
+ {
+ SequenceSelection ss;
+ try
+ {
+ ss = MaseTools::getSequenceSet(maseFileHeader, key);
+ }
+ catch (IOException& ioe)
+ {
+ delete psc;
+ throw ioe;
+ }
+ for (size_t i = 0; i != ss.size(); i++)
+ {
+ try
+ {
+ psc->setAsOutgroupMember(ss[i]);
+ }
+ catch (SequenceNotFoundException& snfe)
+ {
+ delete psc;
+ throw snfe;
+ }
+ }
+ }
+ }
+ return psc;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::extractIngroup (const PolymorphismSequenceContainer& psc) throw (Exception)
+{
+ SequenceSelection ss;
+ PolymorphismSequenceContainer* psci = dynamic_cast<PolymorphismSequenceContainer*>(psc.clone());
+ for (size_t i = 0; i < psc.getNumberOfSequences(); i++)
+ {
+ if (!psc.isIngroupMember(i))
+ ss.push_back(i);
+ }
+ if (ss.size() == psc.getNumberOfSequences())
+ {
+ delete psci;
+ throw Exception("PolymorphismSequenceContainerTools::extractIngroup: no Ingroup sequences found.");
+ }
+ for (size_t i = ss.size(); i > 0; --i)
+ {
+ psci->deleteSequence(ss[i - 1]);
+ }
+ return psci;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::extractOutgroup(const PolymorphismSequenceContainer& psc) throw (Exception)
+{
+ SequenceSelection ss;
+ PolymorphismSequenceContainer* psci = dynamic_cast<PolymorphismSequenceContainer*>(psc.clone());
+ for (size_t i = 0; i < psc.getNumberOfSequences(); i++)
+ {
+ if (psc.isIngroupMember(i) )
+ ss.push_back(i);
+ }
+ if (ss.size() == psc.getNumberOfSequences())
+ {
+ delete psci;
+ throw Exception("PolymorphismSequenceContainerTools::extractOutgroup: no Outgroup sequences found.");
+ }
+ for (size_t i = ss.size(); i > 0; i--)
+ {
+ psci->deleteSequence(ss[i - 1]);
+ }
+ return psci;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::extractGroup(const PolymorphismSequenceContainer& psc, size_t group_id) throw (Exception)
+{
+ SequenceSelection ss;
+ PolymorphismSequenceContainer* psci = dynamic_cast<PolymorphismSequenceContainer*>(psc.clone());
+ for (size_t i = 0; i < psc.getNumberOfSequences(); i++)
+ {
+ if (psc.getGroupId(i) != group_id)
+ ss.push_back(i);
+ }
+ if (ss.size() == psc.getNumberOfSequences())
+ {
+ delete psci;
+ throw GroupNotFoundException("PolymorphismSequenceContainerTools::extractGroup: group_id not found.", group_id);
+ }
+ for (size_t i = ss.size(); i > 0; i--)
+ {
+ psci->deleteSequence(ss[i - 1]);
+ }
+ return psci;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getSelectedSequences(const PolymorphismSequenceContainer& psc, const SequenceSelection& ss)
+{
+ PolymorphismSequenceContainer* newpsc = new PolymorphismSequenceContainer(psc.getAlphabet());
+ for (size_t i = 0; i < ss.size(); i++)
+ {
+ newpsc->addSequence(psc.getSequence(ss[i]), psc.getSequenceCount(i), false);
+ if (psc.isIngroupMember(i))
+ newpsc->setAsIngroupMember(i);
+ else
+ {
+ newpsc->setAsOutgroupMember(i);
+ newpsc->setGroupId(i, psc.getGroupId(i));
+ }
+ }
+ newpsc->setGeneralComments(psc.getGeneralComments());
+ return newpsc;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::sample(const PolymorphismSequenceContainer& psc, size_t n, bool replace)
+{
+ size_t nbSeq = psc.getNumberOfSequences();
+ vector<size_t> v;
+ for (size_t i = 0; i < nbSeq; ++i)
+ {
+ v.push_back(i);
+ }
+ vector<size_t> vv(n);
+ RandomTools::getSample(v, vv, replace);
+ PolymorphismSequenceContainer* newpsc = PolymorphismSequenceContainerTools::getSelectedSequences(psc, vv);
+ return newpsc;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getSitesWithoutGaps (const PolymorphismSequenceContainer& psc)
+{
+ vector<string> seqNames = psc.getSequencesNames();
+ PolymorphismSequenceContainer* noGapCont = new PolymorphismSequenceContainer(psc.getNumberOfSequences(), psc.getAlphabet());
+ noGapCont->setSequencesNames(seqNames, false);
+ size_t nbSeq = psc.getNumberOfSequences();
+ for (size_t i = 0; i < nbSeq; i++)
+ {
+ noGapCont->setSequenceCount(i, psc.getSequenceCount(i));
+ if (psc.isIngroupMember(i))
+ noGapCont->setAsIngroupMember(i);
+ else
+ {
+ noGapCont->setAsOutgroupMember(i);
+ noGapCont->setGroupId(i, psc.getGroupId(i));
+ }
+ }
+ NoGapSiteContainerIterator ngsi(psc);
+ while (ngsi.hasMoreSites())
+ noGapCont->addSite(*ngsi.nextSite());
+ return noGapCont;
+}
+
+/******************************************************************************/
+
+size_t PolymorphismSequenceContainerTools::getNumberOfNonGapSites(const PolymorphismSequenceContainer& psc, bool ingroup) throw (Exception)
+{
+ size_t count = psc.getNumberOfSites();
+ PolymorphismSequenceContainer* npsc = 0;
+ SimpleSiteContainerIterator* ssi;
+ if (ingroup)
+ {
+ try
+ {
+ npsc = extractIngroup(psc);
+ }
+ catch (Exception& e)
+ {
+ if (npsc != NULL)
+ delete npsc;
+ throw e;
+ }
+ ssi = new SimpleSiteContainerIterator(*npsc);
+ }
+ else
+ ssi = new SimpleSiteContainerIterator(psc);
+ while (ssi->hasMoreSites())
+ if (SiteTools::hasGap(*ssi->nextSite()))
+ count--;
+ delete ssi;
+ return count;
+}
+
+/******************************************************************************/
+
+size_t PolymorphismSequenceContainerTools::getNumberOfCompleteSites(const PolymorphismSequenceContainer& psc, bool ingroup) throw (Exception)
+{
+ size_t count = psc.getNumberOfSites();
+ PolymorphismSequenceContainer* npsc = 0;
+ SimpleSiteContainerIterator* ssi;
+ if (ingroup)
+ {
+ try
+ {
+ npsc = extractIngroup(psc);
+ }
+ catch (Exception& e)
+ {
+ if (npsc != NULL)
+ delete npsc;
+ throw e;
+ }
+ ssi = new SimpleSiteContainerIterator(*npsc);
+ }
+ else
+ ssi = new SimpleSiteContainerIterator(psc);
+ while (ssi->hasMoreSites())
+ if (!SiteTools::isComplete(*ssi->nextSite()))
+ count--;
+ delete ssi;
+ return count;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getCompleteSites (const PolymorphismSequenceContainer& psc)
+{
+ vector<string> seqNames = psc.getSequencesNames();
+ PolymorphismSequenceContainer* complete = new PolymorphismSequenceContainer(psc.getNumberOfSequences(), psc.getAlphabet());
+ complete->setSequencesNames(seqNames, false);
+ size_t nbSeq = psc.getNumberOfSequences();
+ for (size_t i = 0; i < nbSeq; i++)
+ {
+ complete->setSequenceCount(i, psc.getSequenceCount(i));
+ if (psc.isIngroupMember(i))
+ complete->setAsIngroupMember(i);
+ else
+ {
+ complete->setAsOutgroupMember(i);
+ complete->setGroupId(i, psc.getGroupId(i));
+ }
+ }
+ CompleteSiteContainerIterator csi(psc);
+ while (csi.hasMoreSites())
+ complete->addSite(*csi.nextSite());
+ return complete;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::excludeFlankingGap(const PolymorphismSequenceContainer& psc)
+{
+ PolymorphismSequenceContainer* psci = dynamic_cast<PolymorphismSequenceContainer*>(psc.clone());
+ while (SiteTools::hasGap(psci->getSite(0)))
+ psci->deleteSite(0);
+ size_t i = 0;
+ size_t n = psci->getNumberOfSites();
+ while (SiteTools::hasGap(psci->getSite(n - i - 1)))
+ {
+ psci->deleteSite(n - i - 1);
+ i++;
+ }
+ return psci;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getSelectedSites(const PolymorphismSequenceContainer& psc, const std::string& setName, bool phase)
+{
+ SiteContainer* pscc = MaseTools::getSelectedSites(psc, setName);
+ Comments maseFileHeader = psc.getGeneralComments();
+ if (phase)
+ {
+ for (size_t i = 1; i < MaseTools::getPhase(maseFileHeader, setName); i++)
+ {
+ pscc->deleteSite(0);
+ }
+ }
+ PolymorphismSequenceContainer* psci = new PolymorphismSequenceContainer(*pscc);
+ for (size_t i = 0; i < psc.getNumberOfSequences(); i++)
+ {
+ if (psc.isIngroupMember(i))
+ psci->setAsIngroupMember(i);
+ else
+ {
+ psci->setAsOutgroupMember(i);
+ psci->setGroupId(i, psc.getGroupId(i));
+ }
+ }
+ psci->deleteGeneralComments();
+ delete pscc;
+ return psci;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getNonCodingSites(const PolymorphismSequenceContainer& psc, const std::string& setName)
+{
+ SiteSelection ss;
+ Comments maseFileHeader = psc.getGeneralComments();
+ SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName);
+ for (size_t i = 0; i < psc.getNumberOfSites(); i++)
+ {
+ if (find(codss.begin(), codss.end(), i) == codss.end())
+ ss.push_back(i);
+ }
+ const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss);
+ PolymorphismSequenceContainer* psci = new PolymorphismSequenceContainer(*sc);
+ for (size_t i = 0; i < psc.getNumberOfSequences(); i++)
+ {
+ if (psc.isIngroupMember(i))
+ psci->setAsIngroupMember(i);
+ else
+ {
+ psci->setAsOutgroupMember(i);
+ psci->setGroupId(i, psc.getGroupId(i));
+ }
+ }
+ delete sc;
+ return psci;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getOnePosition(const PolymorphismSequenceContainer& psc, const std::string& setName, size_t pos)
+{
+ Comments maseFileHeader = psc.getGeneralComments();
+ size_t start;
+ try
+ {
+ start = MaseTools::getPhase(maseFileHeader, setName);
+ }
+ catch (Exception& e)
+ {
+ start = 1;
+ }
+ SiteSelection ss;
+ size_t i;
+ if ((int)pos - (int)start >= 0)
+ i = pos - start;
+ else
+ i = pos - start + 3;
+ while (i < psc.getNumberOfSites())
+ {
+ ss.push_back(i);
+ i += 3;
+ }
+ const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss);
+ PolymorphismSequenceContainer* newpsc = new PolymorphismSequenceContainer(*sc);
+ for (size_t j = 0; j < psc.getNumberOfSequences(); j++)
+ {
+ if (psc.isIngroupMember(j))
+ newpsc->setAsIngroupMember(j);
+ else
+ {
+ newpsc->setAsOutgroupMember(j);
+ newpsc->setGroupId(i, psc.getGroupId(j));
+ }
+ }
+ delete sc;
+ return newpsc;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::getIntrons(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca )
+{
+ Comments maseFileHeader = psc.getGeneralComments();
+ SiteSelection ss;
+ SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName);
+ size_t start;
+ try
+ {
+ start = MaseTools::getPhase(maseFileHeader, setName);
+ }
+ catch (Exception& e)
+ {
+ throw e;
+ }
+
+ size_t first = 0, last = psc.getNumberOfSites();
+ // Check if the first codon is AUG
+ if (start == 1 &&
+ psc.getSite(codss[0]).getValue(0) == 0 &&
+ psc.getSite(codss[1]).getValue(0) == 3 &&
+ psc.getSite(codss[2]).getValue(0) == 2)
+ first = codss[0];
+ // Check if the last codon is a STOP one
+ int c1 = psc.getSite(codss[codss.size() - 3]).getValue(0);
+ int c2 = psc.getSite(codss[codss.size() - 2]).getValue(0);
+ int c3 = psc.getSite(codss[codss.size() - 1]).getValue(0);
+ if (ca->isStop(ca->getCodon(c1, c2, c3)))
+ last = codss[codss.size() - 1];
+ // Keep sites between AUG and STOP
+ for (size_t i = first; i < last; i++)
+ {
+ if (find(codss.begin(), codss.end(), i) == codss.end())
+ {
+ ss.push_back(i);
+ }
+ }
+ const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss);
+ PolymorphismSequenceContainer* psci = new PolymorphismSequenceContainer(*sc);
+ for (size_t i = 0; i < psc.getNumberOfSequences(); i++)
+ {
+ if (psc.isIngroupMember(i))
+ psci->setAsIngroupMember(i);
+ else
+ {
+ psci->setAsOutgroupMember(i);
+ psci->setGroupId(i, psc.getGroupId(i));
+ }
+ }
+ delete sc;
+ return psci;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::get5Prime(const PolymorphismSequenceContainer& psc, const std::string& setName)
+{
+ Comments maseFileHeader = psc.getGeneralComments();
+ SiteSelection ss;
+ SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName);
+ size_t start = MaseTools::getPhase(maseFileHeader, setName);
+ size_t last = 0;
+ // Check if the first Codon is AUG
+ if (start == 1 &&
+ psc.getSite(codss[0]).getValue(0) == 0 &&
+ psc.getSite(codss[1]).getValue(0) == 3 &&
+ psc.getSite(codss[2]).getValue(0) == 2)
+ last = codss[0];
+ for (size_t i = 0; i < last; i++)
+ {
+ if (find(codss.begin(), codss.end(), i) == codss.end())
+ {
+ ss.push_back(i);
+ }
+ }
+ const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss);
+ PolymorphismSequenceContainer* psci = new PolymorphismSequenceContainer(*sc);
+ for (size_t i = 0; i < psc.getNumberOfSequences(); i++)
+ {
+ if (psc.isIngroupMember(i))
+ psci->setAsIngroupMember(i);
+ else
+ {
+ psci->setAsOutgroupMember(i);
+ psci->setGroupId(i, psc.getGroupId(i));
+ }
+ }
+ delete sc;
+ return psci;
+}
+
+/******************************************************************************/
+
+PolymorphismSequenceContainer* PolymorphismSequenceContainerTools::get3Prime(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca )
+{
+ Comments maseFileHeader = psc.getGeneralComments();
+ SiteSelection ss;
+ SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName);
+ size_t first = psc.getNumberOfSites() - 1;
+ // Check if the last codon is a STOP one
+ int c1 = psc.getSite(codss[codss.size() - 3]).getValue(0);
+ int c2 = psc.getSite(codss[codss.size() - 2]).getValue(0);
+ int c3 = psc.getSite(codss[codss.size() - 1]).getValue(0);
+ if (ca->isStop(ca->getCodon(c1, c2, c3)))
+ first = codss[codss.size() - 1];
+ for (size_t i = first; i < psc.getNumberOfSites(); i++)
+ {
+ if (find(codss.begin(), codss.end(), i) == codss.end())
+ {
+ ss.push_back(i);
+ }
+ }
+ const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss);
+ PolymorphismSequenceContainer* psci = new PolymorphismSequenceContainer(*sc);
+ for (size_t i = 0; i < psc.getNumberOfSequences(); i++)
+ {
+ if (psc.isIngroupMember(i))
+ psci->setAsIngroupMember(i);
+ else
+ {
+ psci->setAsOutgroupMember(i);
+ psci->setGroupId(i, psc.getGroupId(i));
+ }
+ }
+ delete sc;
+ return psci;
+}
+
+/******************************************************************************/
+
+string PolymorphismSequenceContainerTools::getIngroupSpeciesName(const PolymorphismSequenceContainer& psc)
+{
+ string key;
+ string speciesName;
+ Comments maseFileHeader = psc.getGeneralComments();
+ if (!maseFileHeader.size())
+ return speciesName;
+ map<string, size_t> groupMap = MaseTools::getAvailableSequenceSelections(maseFileHeader);
+ for (map<string, size_t>::iterator mi = groupMap.begin(); mi != groupMap.end(); mi++)
+ {
+ key = mi->first;
+ if (key.compare(0, 7, "INGROUP") == 0)
+ {
+ StringTokenizer* sptk = new StringTokenizer(key, "_");
+ speciesName = sptk->getToken(1) + " " + sptk->getToken(2);
+ }
+ }
+ return speciesName;
+}
+
+/******************************************************************************/
diff --git a/src/Bpp/PopGen/PolymorphismSequenceContainerTools.h b/src/Bpp/PopGen/PolymorphismSequenceContainerTools.h
new file mode 100644
index 0000000..993f1bc
--- /dev/null
+++ b/src/Bpp/PopGen/PolymorphismSequenceContainerTools.h
@@ -0,0 +1,255 @@
+//
+// File: PolymorphismSequenceContainerTools.h
+// Authors: Eric Bazin
+// Sylvain Gaillard
+// Created on: Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _POLYMORPHISMSEQUENCECONTAINERTOOL_H_
+#define _POLYMORPHISMSEQUENCECONTAINERTOOL_H_
+
+#include <Bpp/Numeric/Random/RandomTools.h>
+#include <Bpp/Text/StringTokenizer.h>
+
+// from SeqLib
+#include <Bpp/Seq/Alphabet/CodonAlphabet.h>
+#include <Bpp/Seq/Io/Mase.h>
+#include <Bpp/Seq/Io/MaseTools.h>
+#include <Bpp/Seq/Container/SiteContainerIterator.h>
+#include <Bpp/Seq/Container/SequenceContainerTools.h>
+#include <Bpp/Seq/Container/VectorSiteContainer.h>
+#include <Bpp/Seq/SiteTools.h>
+
+// from STL
+#include <string>
+
+// From Local
+#include "PolymorphismSequenceContainer.h"
+#include "GeneralExceptions.h"
+
+namespace bpp
+{
+/**
+ * @brief Utilitary function to manipulate PolymorphismSequenceContainer
+ *
+ * @author Sylvain Gaillard
+ */
+
+class PolymorphismSequenceContainerTools
+{
+public:
+ // Class destructor:
+ ~PolymorphismSequenceContainerTools();
+
+ /*******************************************************************************/
+
+public:
+ /**
+ * @brief Read a Mase+ file and return a PolymorphismSequenceContainer. Toggle Sequence
+ * when selection tag begin with OUTGROUP (see Polymorphix)
+ *
+ * @param path Path to the Mase+ file
+ * @param alpha Sequence Alphabet
+ *
+ * @throw Exception if the file is not in the specified format
+ */
+ static PolymorphismSequenceContainer* read(const std::string& path, const Alphabet* alpha) throw (Exception);
+
+ /**
+ * @brief Extract ingroup sequences from a PolymorphismSequenceContainer and create a new one.
+ *
+ * @param psc a PolymorphismSequenceContainer reference
+ *
+ * @throw Exception if there is no ingroup sequence
+ */
+ static PolymorphismSequenceContainer* extractIngroup (const PolymorphismSequenceContainer& psc) throw (Exception);
+
+ /**
+ * @brief Extract outgroup sequences from a PolymorphismSequenceContainer and create a new one.
+ *
+ * @param psc a PolymorphismSequenceContainer reference
+ *
+ * @throw Exception if there is no outgroup sequence
+ */
+ static PolymorphismSequenceContainer* extractOutgroup (const PolymorphismSequenceContainer& psc) throw (Exception);
+
+ /**
+ * @brief Extract a special group from the PolymorphismSequenceContainer.
+ *
+ * @param psc a PolymorphismSequenceContainer reference.
+ * @param group_id the group identifier as an size_t.
+ *
+ * @throw GroupNotFoundException if group_id is not found.
+ */
+ static PolymorphismSequenceContainer* extractGroup(const PolymorphismSequenceContainer& psc, size_t group_id) throw (Exception);
+
+ /**
+ * @brief Extract selected sequences
+ *
+ * @param psc a PolymorphismSequenceContainer reference.
+ * @param ss a sequence selection.
+ *
+ */
+ static PolymorphismSequenceContainer* getSelectedSequences(const PolymorphismSequenceContainer& psc, const SequenceSelection& ss);
+
+
+ /**
+ * @brief Get a random set of sequences
+ *
+ * @param psc a PolymorphismSequenceContainer reference
+ * @param n the number of sequence to get
+ * @param replace a boolean flag true for sampling with replacement
+ */
+ static PolymorphismSequenceContainer* sample(const PolymorphismSequenceContainer& psc, size_t n, bool replace = true);
+
+ /**
+ * @brief Retrieves sites without gaps from PolymorphismSequenceContainer.
+ *
+ * @param psc a PolymorphismSequenceContainer reference
+ */
+ static PolymorphismSequenceContainer* getSitesWithoutGaps (const PolymorphismSequenceContainer& psc);
+
+ /**
+ * @brief Return number of sites without gaps in a PolymorphismSequenceContainer.
+ *
+ * @param psc a PolymorphismSequenceContainer reference
+ * @param ingroup a boolean set to true if you want to take only ingroup sequences into account
+ *
+ * @throw Exception if there is no ingroup sequence
+ */
+ static size_t getNumberOfNonGapSites(const PolymorphismSequenceContainer& psc, bool ingroup) throw (Exception);
+
+ /**
+ * @brief Return number of completely resolved sites in a PolymorphismSequenceContainer.
+ *
+ *
+ * @param psc a PolymorphismSequenceContainer reference
+ * @param ingroup a boolean set to true if you want to take only ingroup sequences into account
+ *
+ * @throw Exception if there is no ingroup sequence
+ */
+ static size_t getNumberOfCompleteSites(const PolymorphismSequenceContainer& psc, bool ingroup) throw (Exception);
+
+
+ /**
+ * @brief Retrieves complete sites from a PolymorphismSequenceContainer.
+ *
+ * @param psc a PolymorphismSequenceContainer reference
+ */
+ static PolymorphismSequenceContainer* getCompleteSites(const PolymorphismSequenceContainer& psc);
+
+
+ /**
+ * @brief exclude flanking sites with gap but keep gap sites within the alignment
+ *
+ * @param psc a PolymorphismSequenceContainer reference
+ */
+ static PolymorphismSequenceContainer* excludeFlankingGap(const PolymorphismSequenceContainer& psc);
+
+ /**
+ * @brief Get a PolymorphismSequenceContainer corresponding to a site selection annotated in the mase comments
+ *
+ * Be carefull : in the new PolymorphismSequenceContainer the mase comments are lost
+ * Information about cds positions and start codon is no more available
+ *
+ * @param psc a PolymorphismSequenceContainer.
+ * @param setName The name of the set to retrieve.
+ * @param phase a boolean set to true if you want to take the phase into account during the extraction. It removes the useless sites.
+ */
+ static PolymorphismSequenceContainer* getSelectedSites(const PolymorphismSequenceContainer& psc, const std::string& setName, bool phase);
+
+ /**
+ * @brief Retrieve non-coding sites defined in the mase file header
+ *
+ * Be carefull: to use before excluding gap
+ *
+ * @param psc a PolymorphismSequenceContainer reference
+ * @param setName name of the CDS site selection
+ */
+ static PolymorphismSequenceContainer* getNonCodingSites(const PolymorphismSequenceContainer& psc, const std::string& setName);
+
+ /**
+ * @brief Retrieve sites at one codon position (1,2,3)
+ *
+ * Be carefull: to use before excluding gap
+ * Be careful: if there is no phase information, the method catch an exception and set the phase to 1
+ * This allows to use this method for PolymorphismSequenceContainer generated by getSelectedSequence
+ *
+ * @param psc a PolymorphismSequenceContainer reference
+ * @param setName name of the CDS site selection
+ * @param pos position index.
+ */
+ static PolymorphismSequenceContainer* getOnePosition(const PolymorphismSequenceContainer& psc, const std::string& setName, size_t pos);
+
+ /**
+ * @brief Retrieve intron sites
+ *
+ * Same as getNonCodgingSites but exclude 5' and 3' flanking regions if there are
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param setName name of the CDS site selection
+ * @param ca a codon alphabet
+ */
+ static PolymorphismSequenceContainer* getIntrons(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca );
+
+ /**
+ * @brief Retrieve 5' sites
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param setName name of the CDS site selection
+ */
+ static PolymorphismSequenceContainer* get5Prime(const PolymorphismSequenceContainer& psc, const std::string& setName);
+
+ /**
+ * @brief Retrieve 3' sites
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param setName name of the CDS site selection
+ * @param ca a codon alphabet
+ */
+ static PolymorphismSequenceContainer* get3Prime(const PolymorphismSequenceContainer& psc, const std::string& setName, const CodonAlphabet* ca );
+
+ /**
+ * @brief Get the species name of the ingroup
+ *
+ * @param psc a PolymorphismSequenceContainer.
+ */
+ static std::string getIngroupSpeciesName(const PolymorphismSequenceContainer& psc);
+};
+} // end of namespace bpp;
+
+#endif // _POLYMORPHISMSEQUENCECONTAINERTOOL_H_
+
diff --git a/src/Bpp/PopGen/PopgenlibIO.cpp b/src/Bpp/PopGen/PopgenlibIO.cpp
new file mode 100644
index 0000000..d0c5b9d
--- /dev/null
+++ b/src/Bpp/PopGen/PopgenlibIO.cpp
@@ -0,0 +1,713 @@
+//
+// File PopgenlibIO.cpp
+// Created by: Sylvain Gaillard
+// Created on: Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "PopgenlibIO.h"
+
+using namespace bpp;
+using namespace std;
+
+const string PopgenlibIO::WHITESPACE = string("WHITESPACE");
+const string PopgenlibIO::TAB = string("TAB");
+const string PopgenlibIO::COMA = string("COMA");
+const string PopgenlibIO::SEMICOLON = string("SEMICOLON");
+
+const string PopgenlibIO::DIPLOID = string("DIPLOID");
+const string PopgenlibIO::HAPLOID = string("HAPLOID");
+const string PopgenlibIO::HAPLODIPLOID = string("HAPLODIPLOID");
+const string PopgenlibIO::UNKNOWN = string("UNKNOWN");
+
+PopgenlibIO::PopgenlibIO() : data_separator_(' '),
+ missing_data_symbol_('$') {}
+
+PopgenlibIO::PopgenlibIO(const std::string& missing_data_symbol,
+ const std::string& data_separator)
+throw (Exception) : data_separator_(' '),
+ missing_data_symbol_('$')
+{
+ try
+ {
+ setDataSeparator(data_separator);
+ setMissingDataSymbol(missing_data_symbol);
+ }
+ catch (Exception& e)
+ {
+ throw e;
+ }
+}
+
+PopgenlibIO::~PopgenlibIO() {}
+
+void PopgenlibIO::setMissingDataSymbol(const std::string& missing_data_symbol) throw (Exception)
+{
+ if (missing_data_symbol.size() != 1 || isdigit(missing_data_symbol[0])
+ || TextTools::isWhiteSpaceCharacter(missing_data_symbol[0])
+ || missing_data_symbol[0] == data_separator_
+ )
+ throw Exception("PopgenlibIO::setMissingData: not expected value for missing_data_symbol.");
+
+ missing_data_symbol_ = missing_data_symbol[0];
+}
+
+void PopgenlibIO::setDataSeparator(const std::string& data_separator) throw (Exception)
+{
+ if (data_separator == WHITESPACE)
+ data_separator_ = ' ';
+ else if (data_separator == TAB)
+ data_separator_ = '\t';
+ else if (data_separator == COMA)
+ data_separator_ = ',';
+ else if (data_separator == SEMICOLON)
+ data_separator_ = ';';
+ else
+ {
+ if (isdigit(data_separator[0])
+ || data_separator == getMissingDataSymbol()
+ )
+ throw Exception("PopgenlibIO::setDataSeparator: not expected value for data_separator.");
+ data_separator_ = data_separator.c_str()[0];
+ }
+}
+
+std::string PopgenlibIO::getMissingDataSymbol() const
+{
+ return TextTools::toString(missing_data_symbol_);
+}
+
+std::string PopgenlibIO::getDataSeparator() const
+{
+ switch (data_separator_)
+ {
+ case (' '): return WHITESPACE;
+ case ('\t'): return TAB;
+ case (','): return COMA;
+ case (';'): return SEMICOLON;
+ default: return TextTools::toString(data_separator_);
+ }
+}
+
+char PopgenlibIO::getMissingDataChar() const
+{
+ return missing_data_symbol_;
+}
+
+char PopgenlibIO::getDataSeparatorChar() const
+{
+ return data_separator_;
+}
+
+void PopgenlibIO::read(std::istream& is, DataSet& data_set) throw (Exception)
+{
+ if (!is)
+ throw IOException("PopgenlibIO::read: fail to open stream.");
+ string temp = "";
+ vector<string> temp_v;
+ stringstream tmp_ss;
+ VectorSequenceContainer* tmp_vsc = NULL;
+ Locality<double> tmp_locality("tmp");
+ vector<LocusInfo> tmp_locinf;
+ Individual tmp_indiv;
+ bool section1 = true;
+ bool section2 = true;
+ bool section3 = true;
+ bool section4 = true;
+ bool section5 = true;
+ size_t current_section = 0;
+ size_t previous_section = 0;
+ size_t linenum = 0;
+ // Main loop for all file lines
+ while (!is.eof())
+ {
+ temp = FileTools::getNextLine(is);
+ linenum++;
+ // Get the correct current section
+ if (temp.find("[General]", 0) != string::npos)
+ {
+ previous_section = current_section;
+ current_section = 1;
+ continue;
+ }
+ else if (temp.find("[Localities]", 0) != string::npos)
+ {
+ previous_section = current_section;
+ current_section = 2;
+ continue;
+ }
+ else if (temp.find("[Sequences]", 0) != string::npos)
+ {
+ previous_section = current_section;
+ current_section = 3;
+ continue;
+ }
+ else if (temp.find("[Loci]", 0) != string::npos)
+ {
+ previous_section = current_section;
+ current_section = 4;
+ continue;
+ }
+ else if (temp.find("[Individuals]", 0) != string::npos)
+ {
+ previous_section = current_section;
+ current_section = 5;
+ continue;
+ }
+ // General section ------------------------------------
+ if (current_section == 1 && previous_section < 1)
+ {
+ temp_v.push_back(temp);
+ }
+ if (section1 && current_section != 1 && previous_section == 1)
+ {
+ section1 = false;
+ parseGeneral_(temp_v, data_set);
+ temp_v.clear();
+ if (data_set.hasSequenceData() && tmp_vsc == NULL)
+ tmp_vsc = new VectorSequenceContainer(data_set.getAlphabet());
+ }
+
+ // Localities section ---------------------------------
+ if (current_section == 2 && previous_section < 2)
+ {
+ if (temp.find(">", 0) != string::npos)
+ {
+ parseLocality_(temp_v, data_set);
+ temp_v.clear();
+ temp_v.push_back(temp);
+ }
+ else
+ temp_v.push_back(temp);
+ }
+ if (section2 && current_section != 2 && previous_section == 2)
+ {
+ section2 = false;
+ parseLocality_(temp_v, data_set);
+ temp_v.clear();
+ }
+
+ // Sequences section ----------------------------------
+ if (current_section == 3 && previous_section < 3)
+ {
+ if (temp.find(">", 0) != string::npos)
+ {
+ parseSequence_(temp_v, *tmp_vsc);
+ temp_v.clear();
+ temp_v.push_back(temp);
+ }
+ else
+ temp_v.push_back(temp);
+ }
+ if (section3 && current_section != 3 && previous_section == 3)
+ {
+ section3 = false;
+ parseSequence_(temp_v, *tmp_vsc);
+ temp_v.clear();
+ }
+
+ // Loci section ---------------------------------------
+ if (current_section == 4 && previous_section < 4)
+ {
+ if (temp.find(">", 0) != string::npos)
+ {
+ parseLoci_(temp_v, tmp_locinf);
+ temp_v.clear();
+ temp_v.push_back(temp);
+ }
+ else
+ temp_v.push_back(temp);
+ }
+ if (section4 && current_section != 4 && previous_section == 4)
+ {
+ section4 = false;
+ parseLoci_(temp_v, tmp_locinf);
+ temp_v.clear();
+ AnalyzedLoci tmp_anloc(tmp_locinf.size());
+ for (size_t i = 0; i < tmp_locinf.size(); i++)
+ {
+ tmp_anloc.setLocusInfo(i, tmp_locinf[i]);
+ }
+ data_set.setAnalyzedLoci(tmp_anloc);
+ }
+
+ // Individuals section --------------------------------
+ if (current_section == 5 && previous_section < 5)
+ {
+ if (temp.find(">", 0) != string::npos)
+ {
+ parseIndividual_(temp_v, data_set, *tmp_vsc);
+ temp_v.clear();
+ temp_v.push_back(temp);
+ }
+ else
+ temp_v.push_back(temp);
+ }
+ if (section5 && current_section != 5 && previous_section == 5)
+ {
+ section5 = false;
+ parseIndividual_(temp_v, data_set, *tmp_vsc);
+ temp_v.clear();
+ }
+ }
+ // Emptied the buffer if eof.
+ if (section2 && current_section == 2)
+ parseLocality_(temp_v, data_set);
+ if (section3 && current_section == 3)
+ parseSequence_(temp_v, *tmp_vsc);
+ if (section5 && current_section == 5)
+ parseIndividual_(temp_v, data_set, *tmp_vsc);
+ temp_v.clear();
+}
+
+void PopgenlibIO::parseGeneral_(const std::vector<std::string>& in, DataSet& data_set)
+{
+ stringstream is;
+ for (size_t i = 0; i < in.size(); i++)
+ {
+ is << in[i] << endl;
+ }
+ string temp;
+ while (!is.eof() && in.size() != 0)
+ {
+ temp = FileTools::getNextLine(is);
+ if (temp.find("MissingData", 0) != string::npos)
+ setMissingDataSymbol(getValues_(temp, "=")[0]);
+ if (temp.find("DataSeparator", 0) != string::npos)
+ setDataSeparator(getValues_(temp, "=")[0]);
+ if (temp.find("SequenceType", 0) != string::npos)
+ data_set.setAlphabet(getValues_(temp, "=")[0]);
+ }
+}
+
+void PopgenlibIO::parseLocality_(const std::vector<std::string>& in, DataSet& data_set)
+{
+ stringstream is;
+ for (size_t i = 0; i < in.size(); i++)
+ {
+ is << in[i] << endl;
+ }
+ Locality<double> tmp_locality("");
+ string temp;
+ while (!is.eof() && in.size() != 0)
+ {
+ temp = FileTools::getNextLine(is);
+ // cout << "_parseLocality: " << temp << endl;
+ if (temp.find(">", 0) != string::npos)
+ {
+ tmp_locality.setName(TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + 1, temp.end())));
+ }
+ if (temp.find("Coord", 0) != string::npos)
+ {
+ vector<string> v = getValues_(temp, "=");
+ tmp_locality.setX(TextTools::toDouble(v[0]));
+ tmp_locality.setY(TextTools::toDouble(v[1]));
+ }
+ }
+ if (tmp_locality.getName() != "")
+ data_set.addLocality(tmp_locality);
+}
+
+void PopgenlibIO::parseSequence_(const std::vector<std::string>& in, VectorSequenceContainer& vsc)
+{
+ Fasta ifasta;
+ stringstream is;
+ for (size_t i = 0; i < in.size(); i++)
+ {
+ is << in[i] << endl;
+ }
+ ifasta.readSequences(is, vsc);
+}
+
+void PopgenlibIO::parseLoci_(const std::vector<std::string>& in, std::vector<LocusInfo>& locus_info)
+{
+ stringstream is;
+ for (size_t i = 0; i < in.size(); i++)
+ {
+ is << in[i] << endl;
+ }
+ string locinf_name = "";
+ unsigned int locinf_ploidy = LocusInfo::DIPLOID;
+ string temp;
+ while (!is.eof())
+ {
+ temp = FileTools::getNextLine(is);
+ if (temp.find(">", 0) != string::npos)
+ {
+ locinf_name = TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + 1, temp.end()));
+ }
+ if (temp.find("Ploidy", 0) != string::npos)
+ {
+ vector<string> v = getValues_(temp, "=");
+ string tmp_str_ploidy = TextTools::removeSurroundingWhiteSpaces(v[0]);
+ tmp_str_ploidy = TextTools::toUpper(tmp_str_ploidy);
+ // cout << "ploidy : " << tmp_str_ploidy << endl;
+ if (tmp_str_ploidy == DIPLOID)
+ locinf_ploidy = LocusInfo::DIPLOID;
+ else if (tmp_str_ploidy == HAPLOID)
+ locinf_ploidy = LocusInfo::HAPLOID;
+ else if (tmp_str_ploidy == HAPLODIPLOID)
+ locinf_ploidy = LocusInfo::HAPLODIPLOID;
+ else if (tmp_str_ploidy == UNKNOWN)
+ locinf_ploidy = LocusInfo::UNKNOWN;
+ }
+ if (temp.find("NbAlleles", 0) != string::npos)
+ {
+ // not used ...
+ }
+ }
+ if (locinf_name != "")
+ locus_info.push_back(LocusInfo(locinf_name, locinf_ploidy));
+}
+
+void PopgenlibIO::parseIndividual_(const std::vector<std::string>& in, DataSet& data_set, const VectorSequenceContainer& vsc)
+{
+ Individual tmp_indiv;
+ size_t tmp_group_pos = 0;
+ string temp = "";
+ for (size_t i = 0; i < in.size(); i++)
+ {
+ // Get Individual Id
+ if (in[i].find(">", 0) != string::npos)
+ {
+ tmp_indiv.setId(TextTools::removeSurroundingWhiteSpaces(string(in[i].begin() + 1, in[i].end())));
+ }
+ // Get the Group
+ if (in[i].find("Group", 0) != string::npos)
+ {
+ temp = in[i];
+ tmp_group_pos = TextTools::toInt(getValues_(temp, "=")[0]);
+ try
+ {
+ data_set.addEmptyGroup(tmp_group_pos);
+ }
+ catch (...)
+ {}
+ }
+ // Find the locality
+ if (in[i].find("Locality", 0) != string::npos)
+ {
+ temp = in[i];
+ size_t sep_pos = temp.find("=", 0);
+ string loc_name = TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + sep_pos + 1, temp.end()));
+ try
+ {
+ tmp_indiv.setLocality(&data_set.getLocalityByName(loc_name));
+ }
+ catch (...)
+ {}
+ }
+ // Set the coord
+ if (in[i].find("Coord", 0) != string::npos)
+ {
+ temp = in[i];
+ tmp_indiv.setCoord(TextTools::toDouble(getValues_(temp, "=")[0]), TextTools::toDouble(getValues_(temp, "=")[1]));
+ }
+ // And the date
+ if (in[i].find("Date", 0) != string::npos)
+ {
+ int d, m, y;
+ temp = in[i];
+ string tmp_date = getValues_(temp, "=")[0];
+ d = TextTools::toInt(string(tmp_date.begin(), tmp_date.begin() + 2));
+ m = TextTools::toInt(string(tmp_date.begin() + 2, tmp_date.begin() + 4));
+ y = TextTools::toInt(string(tmp_date.begin() + 4, tmp_date.end()));
+ tmp_indiv.setDate(Date(d, m, y));
+ }
+ // Now the sequences
+ if (in[i].find("SequenceData", 0) != string::npos)
+ {
+ i++;
+ temp = in[i];
+ vector<string> seq_pos_str = getValues_(temp, "");
+ for (size_t j = 0; j < seq_pos_str.size(); j++)
+ {
+ try
+ {
+ if (seq_pos_str[j] != getMissingDataSymbol())
+ tmp_indiv.addSequence(j, vsc.getSequence(TextTools::toInt(seq_pos_str[j]) - 1));
+ }
+ catch (...)
+ {}
+ }
+ }
+ // Finally the loci
+ if (in[i].find("AllelicData", 0) != string::npos)
+ {
+ string temp1 = in[++i];
+ string temp2 = in[++i];
+ vector<string> allele_pos_str1 = getValues_(temp1, "");
+ vector<string> allele_pos_str2 = getValues_(temp2, "");
+ try
+ {
+ tmp_indiv.initGenotype(data_set.getNumberOfLoci());
+ }
+ catch (...)
+ {}
+ if (allele_pos_str1.size() == allele_pos_str2.size())
+ {
+ for (size_t j = 0; j < allele_pos_str1.size(); j++)
+ {
+ const LocusInfo& locus_info = data_set.getLocusInfoAtPosition(j);
+ allele_pos_str1[j] = TextTools::removeSurroundingWhiteSpaces(allele_pos_str1[j]);
+ vector<string> tmp_alleles_id;
+ if (allele_pos_str1[j] != getMissingDataSymbol())
+ {
+ BasicAlleleInfo tmp_allele_info(allele_pos_str1[j]);
+ try
+ {
+ data_set.addAlleleInfoByLocusPosition(j, tmp_allele_info);
+ }
+ catch (...)
+ {}
+ tmp_alleles_id.push_back(allele_pos_str1[j]);
+ }
+ allele_pos_str2[j] = TextTools::removeSurroundingWhiteSpaces(allele_pos_str2[j]);
+ if (allele_pos_str2[j] != getMissingDataSymbol())
+ {
+ BasicAlleleInfo tmp_allele_info(allele_pos_str2[j]);
+ try
+ {
+ data_set.addAlleleInfoByLocusPosition(j, tmp_allele_info);
+ }
+ catch (...)
+ {}
+ tmp_alleles_id.push_back(allele_pos_str2[j]);
+ }
+ try
+ {
+ tmp_indiv.setMonolocusGenotypeByAlleleId(j, tmp_alleles_id, locus_info);
+ }
+ catch (...)
+ {}
+ }
+ }
+ }
+ }
+ if (tmp_indiv.getId() != "")
+ {
+ try
+ {
+ data_set.addIndividualToGroup(data_set.getGroupPosition(tmp_group_pos), tmp_indiv);
+ }
+ catch (...)
+ {}
+ }
+}
+
+void PopgenlibIO::read(const std::string& path, DataSet& data_set) throw (Exception)
+{
+ AbstractIDataSet::read(path, data_set);
+}
+
+DataSet* PopgenlibIO::read(std::istream& is) throw (Exception)
+{
+ return AbstractIDataSet::read(is);
+}
+
+DataSet* PopgenlibIO::read(const std::string& path) throw (Exception)
+{
+ return AbstractIDataSet::read(path);
+}
+
+void PopgenlibIO::write(std::ostream& os, const DataSet& data_set) const throw (Exception)
+{
+ size_t seqcpt = 1;
+ // General section --------------------------------------
+ os << "[General]" << endl;
+ os << "MissingData = " << getMissingDataSymbol() << endl;
+ os << "DataSeparator = " << getDataSeparator() << endl;
+ if (data_set.hasSequenceData())
+ {
+ string seq_type = data_set.getAlphabetType();
+ os << "SequenceType = " << seq_type << endl;
+ }
+ // Localities section -----------------------------------
+ if (data_set.hasLocality())
+ {
+ os << endl << "[Localities]" << endl;
+ for (size_t i = 0; i < data_set.getNumberOfLocalities(); i++)
+ {
+ os << ">" << (data_set.getLocalityAtPosition(i)).getName() << endl;
+ os << "Coord = " << (data_set.getLocalityAtPosition(i)).getX();
+ os << " " << (data_set.getLocalityAtPosition(i)).getY() << endl;
+ }
+ }
+
+ // Sequences section ------------------------------------
+ if (data_set.hasSequenceData())
+ {
+ Fasta fasta(80);
+ os << endl << "[Sequences]" << endl;
+ for (size_t i = 0; i < data_set.getNumberOfGroups(); i++)
+ {
+ for (size_t j = 0; j < data_set.getNumberOfIndividualsInGroup(i); j++)
+ {
+ fasta.writeSequences(os, data_set.getIndividualAtPositionFromGroup(i, j)->getSequences());
+ }
+ }
+ }
+
+ // AllelicData section ----------------------------------
+ if (data_set.hasAlleleicData())
+ {
+ os << endl << "[Loci]" << endl;
+ for (size_t i = 0; i < data_set.getNumberOfLoci(); i++)
+ {
+ const LocusInfo& tmp_locus_info = data_set.getLocusInfoAtPosition(i);
+ os << ">" << tmp_locus_info.getName() << endl;
+ os << "Ploidy = ";
+ if (tmp_locus_info.getPloidy() == LocusInfo::HAPLOID)
+ os << HAPLOID;
+ else if (tmp_locus_info.getPloidy() == LocusInfo::DIPLOID)
+ os << DIPLOID;
+ else if (tmp_locus_info.getPloidy() == LocusInfo::HAPLODIPLOID)
+ os << HAPLODIPLOID;
+ else if (tmp_locus_info.getPloidy() == LocusInfo::UNKNOWN)
+ os << UNKNOWN;
+ os << endl;
+ os << "NbAlleles = " << tmp_locus_info.getNumberOfAlleles() << endl;
+ }
+ }
+
+ // Individuals section ----------------------------------
+ os << endl << "[Individuals]" << endl;
+ for (size_t i = 0; i < data_set.getNumberOfGroups(); i++)
+ {
+ for (size_t j = 0; j < data_set.getNumberOfIndividualsInGroup(i); j++)
+ {
+ if (i > 0 || j > 0)
+ os << endl;
+ const Individual* tmp_ind = data_set.getIndividualAtPositionFromGroup(i, j);
+ os << ">" << tmp_ind->getId() << endl;
+ os << "Group = " << TextTools::toString((data_set.getGroupAtPosition(i)).getGroupId()) << endl;
+ if (tmp_ind->hasLocality())
+ os << "Locality = " << tmp_ind->getLocality()->getName() << endl;
+ if (tmp_ind->hasCoord())
+ os << "Coord = " << tmp_ind->getX() << " " << tmp_ind->getY() << endl;
+ if (tmp_ind->hasDate())
+ os << "Date = " << tmp_ind->getDate().getDateStr() << endl;
+ if (tmp_ind->hasSequences())
+ {
+ size_t nbss = tmp_ind->getNumberOfSequences();
+ os << "SequenceData = {" << endl;
+ for (size_t k = 0; k < nbss; k++)
+ {
+ try
+ {
+ tmp_ind->getSequenceAtPosition(k);
+ os << TextTools::toString(seqcpt++);
+ }
+ catch (SequenceNotFoundException)
+ {
+ os << getMissingDataChar();
+ }
+ if (k < nbss - 1)
+ os << getDataSeparatorChar();
+ else
+ os << endl;
+ }
+ os << "}" << endl;
+ }
+ if (tmp_ind->hasGenotype())
+ {
+ const MultilocusGenotype& tmp_genotype = tmp_ind->getGenotype();
+ vector<vector<string> > output(tmp_genotype.size());
+ os << "AllelicData = {" << endl;
+ for (size_t k = 0; k < tmp_genotype.size(); k++)
+ {
+ output[k].resize(2);
+ if (tmp_genotype.isMonolocusGenotypeMissing(k))
+ {
+ output[k][0] = getMissingDataChar();
+ output[k][1] = getMissingDataChar();
+ }
+ else
+ {
+ vector<size_t> tmp_all_ind = tmp_genotype.getMonolocusGenotype(k).getAlleleIndex();
+ output[k][0] = data_set.getLocusInfoAtPosition(k).getAlleleInfoByKey(tmp_all_ind[0]).getId();
+ if (tmp_all_ind.size() > 1)
+ output[k][1] = data_set.getLocusInfoAtPosition(k).getAlleleInfoByKey(tmp_all_ind[1]).getId();
+ else
+ output[k][1] = getMissingDataChar();
+ }
+ }
+ for (size_t k = 0; k < output.size(); k++)
+ {
+ os << output[k][0];
+ if (k < output.size() - 1)
+ os << getDataSeparatorChar();
+ else
+ os << endl;
+ }
+ for (size_t k = 0; k < output.size(); k++)
+ {
+ os << output[k][1];
+ if (k < output.size() - 1)
+ os << getDataSeparatorChar();
+ else
+ os << endl;
+ }
+ os << "}" << endl;
+ }
+ }
+ }
+}
+
+void PopgenlibIO::write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception)
+{
+ AbstractODataSet::write(path, data_set, overwrite);
+}
+
+std::vector<std::string> PopgenlibIO::getValues_(std::string& param_line, const std::string& delim)
+{
+ vector<string> values;
+ size_t limit = param_line.find(delim, 0);
+ if (limit >= 0)
+ param_line = string(param_line.begin() + limit + delim.size(), param_line.end());
+ param_line = TextTools::removeSurroundingWhiteSpaces(param_line);
+
+ size_t bi = 0;
+ size_t bs = param_line.find(getDataSeparatorChar(), bi);
+ while (bs > 0)
+ {
+ values.push_back(string(param_line.begin() + bi, param_line.begin() + bs));
+ bi = bs + 1;
+ bs = param_line.find(getDataSeparatorChar(), bi);
+ }
+ values.push_back(string(param_line.begin() + bi, param_line.end()));
+ return values;
+}
+
diff --git a/src/Bpp/PopGen/PopgenlibIO.h b/src/Bpp/PopGen/PopgenlibIO.h
new file mode 100644
index 0000000..c68cd01
--- /dev/null
+++ b/src/Bpp/PopGen/PopgenlibIO.h
@@ -0,0 +1,190 @@
+//
+// File PopgenlibIO.h
+// Created by: Sylvain Gaillard
+// Created on: Thursday July 29 2004
+//
+
+/*
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#ifndef _POPULIBIO_H_
+#define _POPULIBIO_H_
+
+#include <Bpp/Exceptions.h>
+#include <Bpp/Text/TextTools.h>
+#include <Bpp/Io/FileTools.h>
+
+// From Seq
+#include <Bpp/Seq/Io/Fasta.h>
+#include <Bpp/Seq/Container/VectorSequenceContainer.h>
+
+// From local Pop
+#include "AbstractIDataSet.h"
+#include "AbstractODataSet.h"
+#include "BasicAlleleInfo.h"
+
+namespace bpp
+{
+/**
+ * @brief The native I/O format for popgenlib.
+ *
+ * @author Sylvain Gaillard
+ */
+class PopgenlibIO :
+ public AbstractIDataSet,
+ public AbstractODataSet
+{
+public:
+ // Constantes
+ static const std::string WHITESPACE;
+ static const std::string TAB;
+ static const std::string COMA;
+ static const std::string SEMICOLON;
+
+ static const std::string DIPLOID;
+ static const std::string HAPLOID;
+ static const std::string HAPLODIPLOID;
+ static const std::string UNKNOWN;
+
+private:
+ char data_separator_;
+ char missing_data_symbol_;
+
+ std::vector<std::string> getValues_(std::string& param_line, const std::string& delim);
+ void parseGeneral_(const std::vector<std::string>& in, DataSet& data_set);
+ void parseLocality_(const std::vector<std::string>& in, DataSet& data_set);
+ void parseSequence_(const std::vector<std::string>& in, VectorSequenceContainer& vsc);
+ void parseLoci_(const std::vector<std::string>& in, std::vector<LocusInfo>& locus_info);
+ void parseIndividual_(const std::vector<std::string>& in, DataSet& data_set, const VectorSequenceContainer& vsc);
+
+public:
+ // Constructor and destructor
+ PopgenlibIO();
+ PopgenlibIO(const std::string& missing_data_symbol, const std::string& data_separator) throw (Exception);
+ ~PopgenlibIO();
+
+public:
+ /**
+ * @brief Get the code for missing data.
+ */
+ std::string getMissingDataSymbol() const;
+
+ /**
+ * @brief Get the code for data separator.
+ */
+ std::string getDataSeparator() const;
+
+ /**
+ * @brief Get the character for missing data.
+ */
+ char getMissingDataChar() const;
+
+ /**
+ * @brief Get the data separator char.
+ */
+ char getDataSeparatorChar() const;
+
+ /**
+ * @brief Set the code for missing data.
+ *
+ * The character used to code missing data can be every single non numerical
+ * character and can't be the same used as data separator or a white space
+ * or a tabulation.
+ * The default value is '$'.
+ *
+ * @throw Excpetion if missing_data_symbol is a not allowed character.
+ * @throw Exception if missing_data_symbol contains more than one character.
+ */
+ void setMissingDataSymbol(const std::string& missing_data_symbol)
+ throw (Exception);
+
+ /**
+ * @brief Set the code for data separator.
+ *
+ * The character used to separate data can be every single non numerical
+ * character and can't be the same used for coding missing data.
+ * Most common characters used are:
+ * <ul><li>the white space: "WHITESPACE"</li>
+ * <li>the tabulation: "TAB"</li>
+ * <li>the coma: "COMA"</li>
+ * <li>the semicolon: "SEMICOLON"</li></ul>
+ * The default value is "WHITESPACE".
+ *
+ * @throw Exception if data_separator is a not allowed character.
+ * @throw Exception if data_separator containes more than one character other than the two codes defined upper.
+ */
+ void setDataSeparator(const std::string& data_separator)
+ throw (Exception);
+
+ /**
+ * @name The IDataSet interface.
+ * @{
+ */
+ void read(std::istream& is, DataSet& data_set) throw (Exception);
+ void read(const std::string& path, DataSet& data_set) throw (Exception);
+ DataSet* read(std::istream& is) throw (Exception);
+ DataSet* read(const std::string& path) throw (Exception);
+ /**
+ * @}
+ */
+
+ /**
+ * @name The ODataSet interface.
+ * @{
+ */
+ void write(std::ostream& os, const DataSet& data_set) const throw (Exception);
+
+ void write(const std::string& path, const DataSet& data_set, bool overwrite) const throw (Exception);
+ /**
+ * @}
+ */
+
+ /**
+ * @name The IOFormat interface
+ * @{
+ */
+ const std::string getFormatName() const
+ {
+ return "PopgenlibIO ver 0.1";
+ }
+ const std::string getFormatDescription() const
+ {
+ return "IO format used to store DataSets inspired from Arlequin and Fasta";
+ }
+ /**
+ * @}
+ */
+};
+} // end of namespace bpp;
+
+#endif // _POPULIBIO_H_
diff --git a/src/Bpp/PopGen/SequenceStatistics.cpp b/src/Bpp/PopGen/SequenceStatistics.cpp
new file mode 100644
index 0000000..b2dcef5
--- /dev/null
+++ b/src/Bpp/PopGen/SequenceStatistics.cpp
@@ -0,0 +1,1763 @@
+//
+// File SequenceStatistics.cpp
+// Authors: Eric Bazin
+// Sylvain Gailard
+// Khalid Belkhir
+// Benoit Nabholz
+// Created on: Wed Aug 04 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "SequenceStatistics.h" // class's header file
+#include "PolymorphismSequenceContainerTools.h"
+#include "PolymorphismSequenceContainer.h"
+
+// From the STL:
+#include <ctype.h>
+#include <cmath>
+#include <iostream>
+#include <vector>
+
+using namespace std;
+
+// From SeqLib:
+#include <Bpp/Seq/Site.h>
+#include <Bpp/Seq/SiteTools.h>
+#include <Bpp/Seq/StringSequenceTools.h>
+#include <Bpp/Seq/CodonSiteTools.h>
+#include <Bpp/Seq/Alphabet/DNA.h>
+#include <Bpp/Seq/Alphabet/StandardCodonAlphabet.h>
+#include <Bpp/Seq/GeneticCode/StandardGeneticCode.h>
+
+#include <Bpp/Numeric/VectorTools.h>
+#include <Bpp/Numeric/VectorExceptions.h>
+
+using namespace bpp;
+
+// ******************************************************************************
+// Basic statistics
+// ******************************************************************************
+
+size_t SequenceStatistics::polymorphicSiteNumber(const PolymorphismSequenceContainer& psc, bool gapflag, bool ignoreUnknown)
+{
+ size_t S = 0;
+ const Site* site = 0;
+ ConstSiteIterator* si = 0;
+ if (gapflag)
+ si = new CompleteSiteContainerIterator(psc);
+ else
+ si = new SimpleSiteContainerIterator(psc);
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ if (!SiteTools::isConstant(*site, ignoreUnknown))
+ {
+ S++;
+ }
+ }
+ delete si;
+ return S;
+}
+
+size_t SequenceStatistics::parsimonyInformativeSiteNumber(const PolymorphismSequenceContainer& psc, bool gapflag)
+{
+ ConstSiteIterator* si = 0;
+ if (gapflag)
+ si = new CompleteSiteContainerIterator(psc);
+ else
+ si = new SimpleSiteContainerIterator(psc);
+ size_t S = 0;
+ const Site* site = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ if (SiteTools::isParsimonyInformativeSite(*site))
+ {
+ S++;
+ }
+ }
+ delete si;
+ return S;
+}
+
+size_t SequenceStatistics::countSingleton(const PolymorphismSequenceContainer& psc, bool gapflag)
+{
+ size_t nus = 0;
+ const Site* site = 0;
+ ConstSiteIterator* si = 0;
+ if (gapflag)
+ si = new CompleteSiteContainerIterator(psc);
+ else
+ si = new SimpleSiteContainerIterator(psc);
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ nus += getSingletonNumber_(*site);
+ }
+ delete si;
+ return nus;
+}
+
+size_t SequenceStatistics::tripletNumber(const PolymorphismSequenceContainer& psc, bool gapflag)
+{
+ ConstSiteIterator* si = 0;
+ if (gapflag)
+ si = new CompleteSiteContainerIterator(psc);
+ else
+ si = new SimpleSiteContainerIterator(psc);
+ int S = 0;
+ const Site* site = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ if (SiteTools::isTriplet(*site))
+ {
+ S++;
+ }
+ }
+
+ delete si;
+ return S;
+}
+
+size_t SequenceStatistics::totNumberMutations(const PolymorphismSequenceContainer& psc, bool gapflag)
+{
+ size_t tnm = 0;
+ const Site* site = 0;
+ ConstSiteIterator* si = 0;
+ if (gapflag)
+ si = new CompleteSiteContainerIterator(psc);
+ else
+ si = new SimpleSiteContainerIterator(psc);
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ tnm += getMutationNumber_(*site);
+ }
+ delete si;
+ return tnm;
+}
+
+size_t SequenceStatistics::totMutationsExternalBranchs(
+ const PolymorphismSequenceContainer& ing,
+ const PolymorphismSequenceContainer& outg) throw (Exception)
+{
+ if (ing.getNumberOfSites() != outg.getNumberOfSites())
+ throw Exception("ing and outg must have the same size");
+ size_t nmuts = 0;
+ const Site* site_in = 0;
+ const Site* site_out = 0;
+ ConstSiteIterator* si = 0;
+ ConstSiteIterator* so = 0;
+ si = new SimpleSiteContainerIterator(ing);
+ so = new SimpleSiteContainerIterator(outg);
+ while (si->hasMoreSites())
+ {
+ site_in = si->nextSite();
+ site_out = so->nextSite();
+ // use fully resolved sites
+ if (SiteTools::isComplete(*site_in) && SiteTools::isComplete(*site_out))
+ nmuts += getDerivedSingletonNumber_(*site_in, *site_out); // singletons that are not in outgroup
+ }
+ delete si;
+ delete so;
+ return nmuts;
+}
+
+double SequenceStatistics::heterozygosity(const PolymorphismSequenceContainer& psc, bool gapflag)
+{
+ ConstSiteIterator* si = 0;
+ const Site* site = 0;
+ if (gapflag)
+ si = new CompleteSiteContainerIterator(psc);
+ else
+ si = new SimpleSiteContainerIterator(psc);
+ double S = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ S += SiteTools::heterozygosity(*site);
+ }
+ delete si;
+ return S;
+}
+
+double SequenceStatistics::squaredHeterozygosity(const PolymorphismSequenceContainer& psc, bool gapflag)
+{
+ ConstSiteIterator* si = 0;
+ const Site* site = 0;
+ if (gapflag)
+ si = new CompleteSiteContainerIterator(psc);
+ else
+ si = new SimpleSiteContainerIterator(psc);
+ double S = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ double h = SiteTools::heterozygosity(*site);
+ S += h * h;
+ }
+ delete si;
+ return S;
+}
+
+// ******************************************************************************
+// GC statistics
+// ******************************************************************************
+
+double SequenceStatistics::gcContent(const PolymorphismSequenceContainer& psc)
+{
+ map<int, double> freqs;
+ SequenceContainerTools::getFrequencies(psc, freqs);
+ const Alphabet* alpha = psc.getAlphabet();
+ return (freqs[alpha->charToInt("C")] + freqs[alpha->charToInt("G")]) / (freqs[alpha->charToInt("A")] + freqs[alpha->charToInt("C")] + freqs[alpha->charToInt("G")] + freqs[alpha->charToInt("T")]);
+}
+
+std::vector<size_t> SequenceStatistics::gcPolymorphism(const PolymorphismSequenceContainer& psc, bool gapflag)
+{
+ size_t nbMut = 0;
+ size_t nbGC = 0;
+ const size_t nbSeq = psc.getNumberOfSequences();
+ vector<size_t> vect(2);
+ const Site* site = 0;
+ ConstSiteIterator* si = 0;
+ if (gapflag)
+ si = new CompleteSiteContainerIterator(psc);
+ else
+ si = new NoGapSiteContainerIterator(psc);
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ if (!SiteTools::isConstant(*site))
+ {
+ long double freqGC = SymbolListTools::getGCContent(*site);
+ /*
+ * Sylvain Gaillard 15/03/2010: realy unclear ...
+ * freqGC is always in [0,1] then why testing it ?
+ * why casting double into size_t ?
+ * is that method used by someone ?
+ */
+ if (freqGC > 0 && freqGC < 1)
+ {
+ nbMut += static_cast<size_t>(nbSeq);
+ long double adGC = freqGC * nbSeq;
+ nbGC += static_cast<size_t>(adGC);
+ }
+ }
+ }
+ vect[0] = nbMut;
+ vect[1] = nbGC;
+ delete si;
+ return vect;
+}
+
+// ******************************************************************************
+// Diversity statistics
+// ******************************************************************************
+
+double SequenceStatistics::watterson75(const PolymorphismSequenceContainer& psc, bool gapflag, bool ignoreUnknown)
+{
+ double ThetaW;
+ size_t n = psc.getNumberOfSequences();
+ size_t S = polymorphicSiteNumber(psc, gapflag, ignoreUnknown);
+ map<string, double> values = getUsefullValues_(n);
+ ThetaW = (double) S / values["a1"];
+ return ThetaW;
+}
+
+double SequenceStatistics::tajima83(const PolymorphismSequenceContainer& psc, bool gapflag)
+{
+ size_t alphabet_size = (psc.getAlphabet())->getSize();
+ const Site* site = 0;
+ ConstSiteIterator* si = 0;
+ double value2 = 0.;
+ if (gapflag)
+ si = new CompleteSiteContainerIterator(psc);
+ else
+ si = new SimpleSiteContainerIterator(psc);
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ if (!SiteTools::isConstant(*site))
+ {
+ double value = 0.;
+ map<int, size_t> count;
+ SymbolListTools::getCounts(*site, count);
+ map<int, size_t> tmp_k;
+ size_t tmp_n = 0;
+ for (map<int, size_t>::iterator it = count.begin(); it != count.end(); it++)
+ {
+ if (it->first >= 0 && it->first < static_cast<int>(alphabet_size))
+ {
+ tmp_k[it->first] = it->second * (it->second - 1);
+ tmp_n += it->second;
+ }
+ }
+ if (tmp_n == 0 || tmp_n == 1)
+ continue;
+ for (map<int, size_t>::iterator it = tmp_k.begin(); it != tmp_k.end(); it++)
+ {
+ value += static_cast<double>(it->second) / static_cast<double>(tmp_n * (tmp_n - 1));
+ }
+ value2 += 1. - value;
+ }
+ }
+ delete si;
+ return value2;
+}
+
+double SequenceStatistics::FayWu2000(const PolymorphismSequenceContainer& psc, const Sequence& ancestralSites)
+{
+ if (psc.getNumberOfSites() != ancestralSites.size())
+ throw Exception("SequenceStatistics::FayWu2000: ancestralSites and psc don't have the same size!!!'" );
+
+ const Sequence& tmps = psc.getSequence(0);
+
+ size_t alphabet_size = (psc.getAlphabet())->getSize();
+ double value = 0.;
+ for (size_t i = 0; i < psc.getNumberOfSites(); i++)
+ {
+ const Site& site = psc.getSite(i);
+ string ancB = ancestralSites.getChar(i);
+ int ancV = ancestralSites.getValue(i);
+
+ if (!SiteTools::isConstant(site) || tmps.getChar(i) != ancB)
+ {
+ if (ancV < 0)
+ continue;
+
+ map<int, size_t> count;
+ SymbolListTools::getCounts(site, count);
+ map<int, size_t> tmp_k;
+ size_t tmp_n = 0;
+ for (map<int, size_t>::iterator it = count.begin(); it != count.end(); it++)
+ {
+ if (it->first >= 0 && it->first < static_cast<int>(alphabet_size))
+ {
+ /* if derived allele */
+ if (it->first != ancV)
+ {
+ tmp_k[it->first] = 2 * it->second * it->second;
+ }
+ tmp_n += it->second;
+ }
+ }
+ if (tmp_n == 0 || tmp_n == 1)
+ continue;
+ for (map<int, size_t>::iterator it = tmp_k.begin(); it != tmp_k.end(); it++)
+ {
+ value += static_cast<double>(it->second) / static_cast<double>(tmp_n * (tmp_n - 1));
+ }
+ }
+ }
+ return value;
+}
+
+size_t SequenceStatistics::DVK(const PolymorphismSequenceContainer& psc, bool gapflag)
+{
+ /*
+ * Sylvain Gaillard 17/03/2010:
+ * This implementation uses unneeded SequenceContainer recopy and works on
+ * string. It needs to be improved.
+ */
+ PolymorphismSequenceContainer* sc = 0;
+ if (gapflag)
+ sc = PolymorphismSequenceContainerTools::getSitesWithoutGaps(psc);
+ else
+ sc = new PolymorphismSequenceContainer(psc);
+ // int K = 0;
+ vector<string> pscvector;
+ pscvector.push_back(sc->toString(0));
+ // K++;
+ for (size_t i = 1; i < sc->getNumberOfSequences(); i++)
+ {
+ bool uniq = true;
+ string query = sc->toString(i);
+ for (vector<string>::iterator it = pscvector.begin(); it != pscvector.end(); it++)
+ {
+ if (query.compare(*it) == 0)
+ {
+ uniq = false;
+ break;
+ }
+ }
+ if (uniq)
+ {
+ // K++;
+ pscvector.push_back(query);
+ }
+ }
+ delete sc;
+ // return K;
+ return pscvector.size();
+}
+
+double SequenceStatistics::DVH(const PolymorphismSequenceContainer& psc, bool gapflag)
+{
+ /*
+ * Sylvain Gaillard 17/03/2010:
+ * This implementation uses unneeded SequenceContainer recopy and works on
+ * string. It needs to be improved.
+ */
+ PolymorphismSequenceContainer* sc = 0;
+ if (gapflag)
+ sc = PolymorphismSequenceContainerTools::getSitesWithoutGaps(psc);
+ else
+ sc = new PolymorphismSequenceContainer(psc);
+ double H = 0.;
+ size_t nbSeq;
+ vector<string> pscvector;
+ vector<size_t> effvector;
+ pscvector.push_back(sc->toString(0));
+ effvector.push_back(sc->getSequenceCount(0));
+ nbSeq = sc->getSequenceCount(0);
+ for (size_t i = 1; i < sc->getNumberOfSequences(); i++)
+ {
+ nbSeq += sc->getSequenceCount(i);
+ bool uniq = true;
+ string query = sc->toString(i);
+ for (size_t j = 0; j < pscvector.size(); j++)
+ {
+ if (query.compare(pscvector[j]) == 0)
+ {
+ effvector[j] += sc->getSequenceCount(i);
+ uniq = false;
+ break;
+ }
+ }
+ if (uniq)
+ {
+ pscvector.push_back(query);
+ effvector.push_back(sc->getSequenceCount(i));
+ }
+ }
+ for (size_t i = 0; i < effvector.size(); i++)
+ {
+ H -= (static_cast<double>(effvector[i]) / static_cast<double>(nbSeq)) * ( static_cast<double>(effvector[i]) / static_cast<double>(nbSeq));
+ }
+ H += 1.;
+ delete sc;
+ return H;
+}
+
+size_t SequenceStatistics::getNumberOfTransitions(const PolymorphismSequenceContainer& psc)
+{
+ size_t nbT = 0;
+ ConstSiteIterator* si = new CompleteSiteContainerIterator(psc);
+ const Site* site = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ // if (SiteTools::isConstant(*site) || SiteTools::isTriplet(*site)) continue;
+ if (SiteTools::getNumberOfDistinctCharacters(*site) != 2)
+ continue;
+ vector<int> seq = site->getContent();
+ int state1 = seq[0];
+ int state2 = seq[0];
+ for (size_t i = 1; i < seq.size(); i++)
+ {
+ if (state1 != seq[i])
+ {
+ state2 = seq[i];
+ break;
+ }
+ }
+ if (((state1 == 0 && state2 == 2) || (state1 == 2 && state2 == 0)) ||
+ ((state1 == 1 && state2 == 3) || (state1 == 3 && state2 == 1)))
+ {
+ nbT++;
+ }
+ }
+ delete si;
+ return nbT;
+}
+
+size_t SequenceStatistics::getNumberOfTransversions(const PolymorphismSequenceContainer& psc)
+{
+ size_t nbTv = 0;
+ ConstSiteIterator* si = new CompleteSiteContainerIterator(psc);
+ const Site* site = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ // if (SiteTools::isConstant(*site) || SiteTools::isTriplet(*site)) continue;
+ if (SiteTools::getNumberOfDistinctCharacters(*site) != 2)
+ continue;
+ vector<int> seq = site->getContent();
+ int state1 = seq[0];
+ int state2 = seq[0];
+ for (size_t i = 1; i < seq.size(); i++)
+ {
+ if (state1 != seq[i])
+ {
+ state2 = seq[i];
+ break;
+ }
+ }
+ if (!(((state1 == 0 && state2 == 2) || (state1 == 2 && state2 == 0)) ||
+ ((state1 == 1 && state2 == 3) || (state1 == 3 && state2 == 1))))
+ {
+ nbTv++;
+ }
+ }
+ delete si;
+ return nbTv;
+}
+
+double SequenceStatistics::getTransitionsTransversionsRatio(const PolymorphismSequenceContainer& psc) throw (Exception)
+{
+ // return (double) getNumberOfTransitions(psc)/getNumberOfTransversions(psc);
+ size_t nbT = 0;
+ size_t nbTv = 0;
+ ConstSiteIterator* si = new CompleteSiteContainerIterator(psc);
+ const Site* site = 0;
+ vector< int > state(2);
+ while (si->hasMoreSites())
+ {
+ map<int, size_t> count;
+ site = si->nextSite();
+ SymbolListTools::getCounts(*site, count);
+ if (count.size() != 2)
+ continue;
+ int i = 0;
+ for (map<int, size_t>::iterator it = count.begin(); it != count.end(); it++)
+ {
+ state[i] = it->first;
+ i++;
+ }
+ if (((state[0] == 0 && state[1] == 2) || (state[0] == 2 && state[1] == 0)) ||
+ ((state[0] == 1 && state[1] == 3) || (state[0] == 3 && state[1] == 1)))
+ {
+ nbT++; // transitions
+ }
+ else
+ {
+ nbTv++; // transversion
+ }
+ }
+ delete si;
+ if (nbTv == 0)
+ throw ZeroDivisionException("SequenceStatistics::getTransitionsTransversionsRatio.");
+ return static_cast<double>(nbT) / static_cast<double>(nbTv);
+}
+
+// ******************************************************************************
+// Synonymous and non-synonymous polymorphism
+// ******************************************************************************
+
+size_t SequenceStatistics::stopCodonSiteNumber(const PolymorphismSequenceContainer& psc, bool gapflag)
+{
+ /*
+ * Sylvain Gaillard 17/03/2010
+ * What if the Alphabet is not a codon alphabet?
+ */
+ ConstSiteIterator* si = 0;
+ if (gapflag)
+ si = new NoGapSiteContainerIterator(psc);
+ else
+ si = new SimpleSiteContainerIterator(psc);
+ size_t S = 0;
+ const Site* site = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ if (CodonSiteTools::hasStop(*site))
+ S++;
+ }
+ delete si;
+ return S;
+}
+
+size_t SequenceStatistics::monoSitePolymorphicCodonNumber(const PolymorphismSequenceContainer& psc, bool stopflag, bool gapflag)
+{
+ ConstSiteIterator* si = 0;
+ if (stopflag)
+ si = new CompleteSiteContainerIterator(psc);
+ else
+ {
+ if (gapflag)
+ si = new NoGapSiteContainerIterator(psc);
+ else
+ si = new SimpleSiteContainerIterator(psc);
+ }
+ size_t S = 0;
+ const Site* site;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ if (CodonSiteTools::isMonoSitePolymorphic(*site))
+ S++;
+ }
+ delete si;
+ return S;
+}
+
+size_t SequenceStatistics::synonymousPolymorphicCodonNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc)
+{
+ ConstSiteIterator* si = new CompleteSiteContainerIterator(psc);
+ size_t S = 0;
+ const Site* site;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ if (CodonSiteTools::isSynonymousPolymorphic(*site, gc))
+ S++;
+ }
+ delete si;
+ return S;
+}
+
+double SequenceStatistics::watterson75Synonymous(const PolymorphismSequenceContainer& psc, const GeneticCode& gc)
+{
+ double ThetaW = 0.;
+ size_t n = psc.getNumberOfSequences();
+ size_t S = synonymousSubstitutionsNumber(psc, gc);
+ map<string, double> values = getUsefullValues_(n);
+ ThetaW = static_cast<double>(S) / values["a1"];
+ return ThetaW;
+}
+
+double SequenceStatistics::watterson75NonSynonymous(const PolymorphismSequenceContainer& psc, const GeneticCode& gc)
+{
+ double ThetaW;
+ size_t n = psc.getNumberOfSequences();
+ size_t S = nonSynonymousSubstitutionsNumber(psc, gc);
+ map<string, double> values = getUsefullValues_(n);
+ ThetaW = static_cast<double>(S) / values["a1"];
+ return ThetaW;
+}
+
+double SequenceStatistics::piSynonymous(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, bool minchange)
+{
+ double S = 0.;
+ ConstSiteIterator* si = new CompleteSiteContainerIterator(psc);
+ const Site* site = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ S += CodonSiteTools::piSynonymous(*site, gc, minchange);
+ }
+ delete si;
+ return S;
+}
+
+double SequenceStatistics::piNonSynonymous(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, bool minchange)
+{
+ double S = 0.;
+ ConstSiteIterator* si = new CompleteSiteContainerIterator(psc);
+ const Site* site = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ S += CodonSiteTools::piNonSynonymous(*site, gc, minchange);
+ }
+ delete si;
+ return S;
+}
+
+double SequenceStatistics::meanSynonymousSitesNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio)
+{
+ double S = 0.;
+ ConstSiteIterator* si = new CompleteSiteContainerIterator(psc);
+ const Site* site = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ S += CodonSiteTools::meanNumberOfSynonymousPositions(*site, gc, ratio);
+ }
+ delete si;
+ return S;
+}
+
+double SequenceStatistics::meanNonSynonymousSitesNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double ratio)
+{
+ double S = 0.;
+ int n = 0;
+ ConstSiteIterator* si = new CompleteSiteContainerIterator(psc);
+ const Site* site = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ n = n + 3;
+ S += CodonSiteTools::meanNumberOfSynonymousPositions(*site, gc, ratio);
+ }
+ delete si;
+ return static_cast<double>(n - S);
+}
+
+size_t SequenceStatistics::synonymousSubstitutionsNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin)
+{
+ size_t St = 0, Sns = 0;
+ ConstSiteIterator* si = new CompleteSiteContainerIterator(psc);
+ const Site* site = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ St += CodonSiteTools::numberOfSubsitutions(*site, freqmin);
+ Sns += CodonSiteTools::numberOfNonSynonymousSubstitutions(*site, gc, freqmin);
+ }
+ delete si;
+ return St - Sns;
+}
+
+size_t SequenceStatistics::nonSynonymousSubstitutionsNumber(const PolymorphismSequenceContainer& psc, const GeneticCode& gc, double freqmin)
+{
+ size_t Sns = 0;
+ ConstSiteIterator* si = new CompleteSiteContainerIterator(psc);
+ const Site* site = 0;
+ while (si->hasMoreSites())
+ {
+ site = si->nextSite();
+ Sns += CodonSiteTools::numberOfNonSynonymousSubstitutions(*site, gc, freqmin);
+ }
+ delete si;
+ return Sns;
+}
+
+vector<size_t> SequenceStatistics::fixedDifferences(const PolymorphismSequenceContainer& pscin, const PolymorphismSequenceContainer& pscout, PolymorphismSequenceContainer& psccons, const GeneticCode& gc)
+{
+ ConstSiteIterator* siIn = new CompleteSiteContainerIterator(pscin);
+ ConstSiteIterator* siOut = new CompleteSiteContainerIterator(pscout);
+ ConstSiteIterator* siCons = new CompleteSiteContainerIterator(psccons);
+ const Site* siteIn = 0;
+ const Site* siteOut = 0;
+ const Site* siteCons = 0;
+ size_t NfixS = 0;
+ size_t NfixA = 0;
+ while (siIn->hasMoreSites())
+ {
+ siteIn = siIn->nextSite();
+ siteOut = siOut->nextSite();
+ siteCons = siCons->nextSite();
+ vector<size_t> v = CodonSiteTools::fixedDifferences(*siteIn, *siteOut, siteCons->getValue(0), siteCons->getValue(1), gc);
+ NfixS += v[0];
+ NfixA += v[1];
+ }
+ vector<size_t> v(2);
+ v[0] = NfixS;
+ v[1] = NfixA;
+ delete siIn;
+ delete siOut;
+ delete siCons;
+ return v;
+}
+
+vector<size_t> SequenceStatistics::MKtable(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, const GeneticCode& gc, double freqmin)
+{
+ PolymorphismSequenceContainer psctot(ingroup);
+ for (size_t i = 0; i < outgroup.getNumberOfSequences(); i++)
+ {
+ psctot.addSequence(outgroup.getSequence(i));
+ psctot.setAsOutgroupMember(i + ingroup.getNumberOfSequences());
+ }
+ const PolymorphismSequenceContainer* psccomplet = PolymorphismSequenceContainerTools::getCompleteSites(psctot);
+ const PolymorphismSequenceContainer* pscin = PolymorphismSequenceContainerTools::extractIngroup(*psccomplet);
+ const PolymorphismSequenceContainer* pscout = PolymorphismSequenceContainerTools::extractOutgroup(*psccomplet);
+ const Sequence* consensusIn = SiteContainerTools::getConsensus(*pscin, "consensusIn");
+ const Sequence* consensusOut = SiteContainerTools::getConsensus(*pscout, "consensusOut");
+ PolymorphismSequenceContainer* consensus = new PolymorphismSequenceContainer(ingroup.getAlphabet());
+ consensus->addSequence(*consensusIn);
+ consensus->addSequence(*consensusOut);
+ vector<size_t> u = SequenceStatistics::fixedDifferences(*pscin, *pscout, *consensus, gc);
+ vector<size_t> v(4);
+ v[0] = SequenceStatistics::nonSynonymousSubstitutionsNumber(*pscin, gc, freqmin);
+ v[1] = SequenceStatistics::synonymousSubstitutionsNumber(*pscin, gc, freqmin);
+ v[2] = u[1];
+ v[3] = u[0];
+ delete consensus;
+ if (psccomplet)
+ {
+ delete psccomplet;
+ }
+ if (pscin)
+ {
+ delete pscin;
+ }
+ if (pscout)
+ {
+ delete pscout;
+ }
+ if (consensusIn)
+ {
+ delete consensusIn;
+ }
+ if (consensusOut)
+ {
+ delete consensusOut;
+ }
+ return v;
+}
+
+double SequenceStatistics::neutralityIndex(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, const GeneticCode& gc, double freqmin)
+{
+ vector<size_t> v = SequenceStatistics::MKtable(ingroup, outgroup, gc, freqmin);
+ if (v[1] != 0 && v[2] != 0)
+ return static_cast<double>(v[0] * v[3]) / static_cast<double>(v[1] * v[2]);
+ else
+ return -1;
+}
+
+// ******************************************************************************
+// Statistical tests
+// ******************************************************************************
+
+double SequenceStatistics::tajimaDSS(const PolymorphismSequenceContainer& psc, bool gapflag) throw (ZeroDivisionException)
+{
+ double S = static_cast<double>(polymorphicSiteNumber(psc, gapflag));
+ if (!S)
+ throw ZeroDivisionException("S should not be null");
+ double tajima = tajima83(psc, gapflag);
+ double watterson = watterson75(psc, gapflag);
+ size_t n = psc.getNumberOfSequences();
+ map<string, double> values = getUsefullValues_(n);
+ // if (S == 0)
+ // cout << "ARG S == 0" << endl;
+ return (tajima - watterson) / sqrt((values["e1"] * S) + (values["e2"] * S * (S - 1)));
+}
+
+double SequenceStatistics::tajimaDTNM(const PolymorphismSequenceContainer& psc, bool gapflag) throw (ZeroDivisionException)
+{
+ double eta = static_cast<double>(totNumberMutations(psc, gapflag));
+ if (!eta)
+ throw ZeroDivisionException("eta should not be null");
+ double tajima = tajima83(psc, gapflag);
+ size_t n = psc.getNumberOfSequences();
+ map<string, double> values = getUsefullValues_(n);
+ double eta_a1 = static_cast<double>(eta) / values["a1"];
+ return (tajima - eta_a1) / sqrt((values["e1"] * eta) + (values["e2"] * eta * (eta - 1)));
+}
+
+double SequenceStatistics::fuliD(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original) throw (ZeroDivisionException)
+{
+ size_t n = ingroup.getNumberOfSequences();
+ map<string, double> values = getUsefullValues_(n);
+ double vD = getVD_(n, values["a1"], values["a2"], values["cn"]);
+ double uD = getUD_(values["a1"], vD);
+ double eta = static_cast<double>(totNumberMutations(ingroup));
+ if (eta == 0.)
+ throw ZeroDivisionException("eta should not be null");
+ double etae = 0.;
+ if (original)
+ etae = static_cast<double>(countSingleton(outgroup));
+ else
+ etae = static_cast<double>(totMutationsExternalBranchs(ingroup, outgroup)); // added by Khalid 13/07/2005
+ return (eta - (values["a1"] * etae)) / sqrt((uD * eta) + (vD * eta * eta));
+}
+
+double SequenceStatistics::fuliDstar(const PolymorphismSequenceContainer& group) throw (ZeroDivisionException)
+{
+ size_t n = group.getNumberOfSequences();
+ double nn = static_cast<double>(n);
+ double _n = nn / (nn - 1.);
+ map<string, double> values = getUsefullValues_(n);
+ double vDs = getVDstar_(n, values["a1"], values["a2"], values["dn"]);
+ double uDs = getUDstar_(n, values["a1"], vDs);
+ double eta = static_cast<double>(totNumberMutations(group));
+ if (eta == 0.)
+ throw ZeroDivisionException("eta should not be null");
+ double etas = static_cast<double>(countSingleton(group));
+
+ // Fu & Li 1993
+ return ((_n * eta) - (values["a1"] * etas)) / sqrt(uDs * eta + vDs * eta * eta);
+
+ // Simonsen et al. 1995
+ /*
+ return ((eta / values["a1"]) - (etas * ((n - 1) / n))) / sqrt(uDs * eta + vDs * eta * eta);
+ */
+}
+
+double SequenceStatistics::fuliF(const PolymorphismSequenceContainer& ingroup, const PolymorphismSequenceContainer& outgroup, bool original) throw (ZeroDivisionException)
+{
+ size_t n = ingroup.getNumberOfSequences();
+ double nn = static_cast<double>(n);
+ map<string, double> values = getUsefullValues_(n);
+ double pi = tajima83(ingroup, true);
+ double vF = (values["cn"] + values["b2"] - 2. / (nn - 1.)) / (pow(values["a1"], 2) + values["a2"]);
+ double uF = ((1. + values["b1"] - (4. * ((nn + 1.) / ((nn - 1.) * (nn - 1.)))) * (values["a1n"] - (2. * nn) / (nn + 1.))) / values["a1"]) - vF;
+ double eta = static_cast<double>(totNumberMutations(ingroup));
+ if (eta == 0.)
+ throw ZeroDivisionException("eta should not be null");
+ double etae = 0.;
+ if (original)
+ etae = static_cast<double>(countSingleton(outgroup));
+ else
+ etae = static_cast<double>(totMutationsExternalBranchs(ingroup, outgroup)); // added by Khalid 13/07/2005
+ return (pi - etae) / sqrt(uF * eta + vF * eta * eta);
+}
+
+double SequenceStatistics::fuliFstar(const PolymorphismSequenceContainer& group) throw (ZeroDivisionException)
+{
+ double n = static_cast<double>(group.getNumberOfSequences());
+ map<string, double> values = getUsefullValues_(group.getNumberOfSequences());
+ double pi = tajima83(group, true);
+
+ // Fu & Li 1993
+ // double vFs = (values["dn"] + values["b2"] - (2. / (nn - 1.)) * (4. * values["a2"] - 6. + 8. / nn)) / (pow(values["a1"], 2) + values["a2"]);
+ // double uFs = (((nn / (nn - 1.)) + values["b1"] - (4. / (nn * (nn - 1.))) + 2. * ((nn + 1.) / (pow((nn - 1.), 2))) * (values["a1n"] - 2. * nn / (nn + 1.))) / values["a1"]) - vFs;
+
+ // Simonsen et al. 1995
+ double vFs = (((2 * n * n * n + 110 * n * n - 255 * n + 153) / (9 * n * n * (n - 1))) + ((2 * (n - 1) * values["a1"]) / (n * n)) - 8 * values["a2"] / n) / (pow(values["a1"], 2) + values["a2"]);
+ double uFs = (((4 * n * n + 19 * n + 3 - 12 * (n + 1) * values["a1n"]) / (3 * n * (n - 1))) / values["a1"]) - vFs;
+ double eta = static_cast<double>(totNumberMutations(group));
+ if (eta == 0.)
+ throw ZeroDivisionException("eta should not be null");
+ double etas = static_cast<double>(countSingleton(group));
+ // Fu & Li 1993
+ // Simonsen et al. 1995
+ return (pi - ((n - 1.) / n * etas)) / sqrt(uFs * eta + vFs * eta * eta);
+}
+
+double SequenceStatistics::FstHudson92(const PolymorphismSequenceContainer& psc, size_t id1, size_t id2)
+{
+ vector<double> vdiff;
+ double piIntra1, piIntra2, meanPiIntra, piInter, Fst;
+
+ PolymorphismSequenceContainer* Pop1 = PolymorphismSequenceContainerTools::extractGroup(psc, id1);
+ PolymorphismSequenceContainer* Pop2 = PolymorphismSequenceContainerTools::extractGroup(psc, id2);
+
+ piIntra1 = SequenceStatistics::tajima83(*Pop1, false);
+ piIntra2 = SequenceStatistics::tajima83(*Pop2, false);
+
+ meanPiIntra = (piIntra1 + piIntra2) / 2;
+
+ double n = 0;
+ for (size_t i = 0; i < Pop1->getNumberOfSequences(); i++)
+ {
+ const Sequence& s1 = Pop1->getSequence(i);
+ for (size_t j = 0; j < Pop2->getNumberOfSequences(); j++)
+ {
+ n++;
+ const Sequence& s2 = Pop2->getSequence(j);
+ vdiff.push_back(SiteContainerTools::computeSimilarity(s1, s2, true, "no gap", true));
+ }
+ }
+ piInter = (VectorTools::sum(vdiff) / n) * static_cast<double>(psc.getNumberOfSites());
+
+
+ Fst = 1.0 - meanPiIntra / piInter;
+
+ delete Pop1;
+ delete Pop2;
+
+ return Fst;
+}
+
+// ******************************************************************************
+// Linkage disequilibrium statistics
+// ******************************************************************************
+
+/**********************/
+/* Preliminary method */
+/**********************/
+
+PolymorphismSequenceContainer* SequenceStatistics::generateLDContainer(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin)
+{
+ SiteSelection ss;
+ // Extract polymorphic site with only two alleles
+ for (size_t i = 0; i < psc.getNumberOfSites(); i++)
+ {
+ if (keepsingleton)
+ {
+ if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i)))
+ {
+ ss.push_back(i);
+ }
+ }
+ else
+ {
+ if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i)) && !SiteTools::hasSingleton(psc.getSite(i)))
+ {
+ ss.push_back(i);
+ }
+ }
+ }
+
+ const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss);
+ Alphabet* alpha = new DNA(); // Sylvain Gaillard 17/03/2010: What if psc's Alphabet is not DNA
+ PolymorphismSequenceContainer* ldpsc = new PolymorphismSequenceContainer(sc->getNumberOfSequences(), alpha);
+ // Assign 1 to the more frequent and 0 to the less frequent alleles
+ for (size_t i = 0; i < sc->getNumberOfSites(); i++)
+ {
+ const Site& site = sc->getSite(i);
+ Site siteclone(site);
+ bool deletesite = false;
+ map<int, double> freqs;
+ SymbolListTools::getFrequencies(siteclone, freqs);
+ int first = 0;
+ for (map<int, double>::iterator it = freqs.begin(); it != freqs.end(); it++)
+ {
+ if (it->second >= 0.5)
+ first = it->first;
+ }
+ for (size_t j = 0; j < sc->getNumberOfSequences(); j++)
+ {
+ if (freqs[site.getValue(j)] >= 0.5 && site.getValue(j) == first)
+ {
+ if (freqs[site.getValue(j)] <= 1 - freqmin)
+ {
+ siteclone.setElement(j, 1);
+ first = site.getValue(j);
+ }
+ else
+ deletesite = true;
+ }
+ else
+ {
+ if (freqs[site.getValue(j)] >= freqmin)
+ siteclone.setElement(j, 0);
+ else
+ deletesite = true;
+ }
+ }
+ if (!deletesite)
+ ldpsc->addSite(siteclone);
+ }
+ delete alpha;
+ return ldpsc;
+}
+
+/*************************************/
+/* Pairwise LD and distance measures */
+/*************************************/
+
+Vdouble SequenceStatistics::pairwiseDistances1(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ // get Positions with sites of interest
+ SiteSelection ss;
+ for (size_t i = 0; i < psc.getNumberOfSites(); i++)
+ {
+ if (keepsingleton)
+ {
+ if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i)))
+ {
+ const Site& site = psc.getSite(i);
+ bool deletesite = false;
+ map<int, double> freqs;
+ SymbolListTools::getFrequencies(site, freqs);
+ for (int j = 0; j < static_cast<int>(site.getAlphabet()->getSize()); j++)
+ {
+ if (freqs[j] >= 1 - freqmin)
+ deletesite = true;
+ }
+ if (!deletesite)
+ ss.push_back(i);
+ }
+ }
+ else
+ {
+ if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i)) && !SiteTools::hasSingleton(psc.getSite(i)))
+ {
+ ss.push_back(i);
+ const Site& site = psc.getSite(i);
+ bool deletesite = false;
+ map<int, double> freqs;
+ SymbolListTools::getFrequencies(site, freqs);
+ for (int j = 0; j < static_cast<int>(site.getAlphabet()->getSize()); j++)
+ {
+ if (freqs[j] >= 1 - freqmin)
+ deletesite = true;
+ }
+ if (!deletesite)
+ ss.push_back(i);
+ }
+ }
+ }
+ // compute pairwise distances
+ if (ss.size() < 2)
+ throw DimensionException("SequenceStatistics::pairwiseDistances1 : less than 2 sites are available", ss.size(), 2);
+ Vdouble dist;
+ for (size_t i = 0; i < ss.size() - 1; i++)
+ {
+ for (size_t j = i + 1; j < ss.size(); j++)
+ {
+ dist.push_back(static_cast<double>(ss[j] - ss[i]));
+ }
+ }
+ return dist;
+}
+
+Vdouble SequenceStatistics::pairwiseDistances2(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ SiteSelection ss;
+ for (size_t i = 0; i < psc.getNumberOfSites(); i++)
+ {
+ if (keepsingleton)
+ {
+ if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i)))
+ {
+ const Site& site = psc.getSite(i);
+ bool deletesite = false;
+ map<int, double> freqs;
+ SymbolListTools::getFrequencies(site, freqs);
+ for (int j = 0; j < static_cast<int>(site.getAlphabet()->getSize()); j++)
+ {
+ if (freqs[j] >= 1 - freqmin)
+ deletesite = true;
+ }
+ if (!deletesite)
+ ss.push_back(i);
+ }
+ }
+ else
+ {
+ if (SiteTools::isComplete(psc.getSite(i)) && !SiteTools::isConstant(psc.getSite(i)) && !SiteTools::isTriplet(psc.getSite(i)) && !SiteTools::hasSingleton(psc.getSite(i)))
+ {
+ ss.push_back(i);
+ const Site& site = psc.getSite(i);
+ bool deletesite = false;
+ map<int, double> freqs;
+ SymbolListTools::getFrequencies(site, freqs);
+ for (int j = 0; j < static_cast<int>(site.getAlphabet()->getSize()); j++)
+ {
+ if (freqs[j] >= 1 - freqmin)
+ deletesite = true;
+ }
+ if (!deletesite)
+ ss.push_back(i);
+ }
+ }
+ }
+ size_t n = ss.size();
+ if (n < 2)
+ throw DimensionException("SequenceStatistics::pairwiseDistances1 : less than 2 sites are available", ss.size(), 2);
+ Vdouble distance(n * (n - 1) / 2, 0);
+ size_t nbsite = psc.getNumberOfSites();
+ for (size_t k = 0; k < psc.getNumberOfSequences(); k++)
+ {
+ const Sequence& seq = psc.getSequence(k);
+ SiteSelection gap, newss = ss;
+ Vdouble dist;
+ for (size_t i = 0; i < nbsite; i++)
+ {
+ if (seq.getValue(i) == -1)
+ gap.push_back(i);
+ }
+ // Site positions are re-numbered to take gaps into account
+ for (size_t i = 0; i < gap.size(); i++)
+ {
+ for (size_t j = 0; j < ss.size(); j++)
+ {
+ if (ss[j] > gap[i])
+ newss[j]--;
+ }
+ }
+ for (size_t i = 0; i < n - 1; i++)
+ {
+ for (size_t j = i + 1; j < n; j++)
+ {
+ dist.push_back(static_cast<double>(newss[j] - newss[i]));
+ }
+ }
+ distance += dist;
+ }
+ distance = distance / static_cast<double>(psc.getNumberOfSequences());
+ return distance;
+}
+
+Vdouble SequenceStatistics::pairwiseD(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLDContainer(psc, keepsingleton, freqmin);
+ Vdouble D;
+ size_t nbsite = newpsc->getNumberOfSites();
+ size_t nbseq = newpsc->getNumberOfSequences();
+ if (nbsite < 2)
+ throw DimensionException("SequenceStatistics::pairwiseD: less than two sites are available", nbsite, 2);
+ if (nbseq < 2)
+ throw DimensionException("SequenceStatistics::pairwiseD: less than two sequences are available", nbseq, 2);
+ for (size_t i = 0; i < nbsite - 1; i++)
+ {
+ for (size_t j = i + 1; j < nbsite; j++)
+ {
+ double haplo = 0;
+ const Site& site1 = newpsc->getSite(i);
+ const Site& site2 = newpsc->getSite(j);
+ map<int, double> freq1;
+ map<int, double> freq2;
+ SymbolListTools::getFrequencies(site1, freq1);
+ SymbolListTools::getFrequencies(site2, freq2);
+ for (size_t k = 0; k < nbseq; k++)
+ {
+ if (site1.getValue(k) + site2.getValue(k) == 2)
+ haplo++;
+ }
+ haplo = haplo / static_cast<double>(nbseq);
+ D.push_back(std::abs(haplo - freq1[1] * freq2[1]));
+ }
+ }
+ return D;
+}
+
+Vdouble SequenceStatistics::pairwiseDprime(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLDContainer(psc, keepsingleton, freqmin);
+ Vdouble Dprime;
+ size_t nbsite = newpsc->getNumberOfSites();
+ size_t nbseq = newpsc->getNumberOfSequences();
+ if (nbsite < 2)
+ throw DimensionException("SequenceStatistics::pairwiseD: less than two sites are available", nbsite, 2);
+ if (nbseq < 2)
+ throw DimensionException("SequenceStatistics::pairwiseD: less than two sequences are available", nbseq, 2);
+ for (size_t i = 0; i < nbsite - 1; i++)
+ {
+ for (size_t j = i + 1; j < nbsite; j++)
+ {
+ double haplo = 0;
+ const Site& site1 = newpsc->getSite(i);
+ const Site& site2 = newpsc->getSite(j);
+ map<int, double> freq1;
+ map<int, double> freq2;
+ SymbolListTools::getFrequencies(site1, freq1);
+ SymbolListTools::getFrequencies(site2, freq2);
+ for (size_t k = 0; k < nbseq; k++)
+ {
+ if (site1.getValue(k) + site2.getValue(k) == 2)
+ haplo++;
+ }
+ haplo = haplo / static_cast<double>(nbseq);
+ double d, D = (haplo - freq1[1] * freq2[1]);
+ if (D > 0)
+ {
+ if (freq1[1] * freq2[0] <= freq1[0] * freq2[1])
+ {
+ d = std::abs(D) / (freq1[1] * freq2[0]);
+ }
+ else
+ {
+ d = std::abs(D) / (freq1[0] * freq2[1]);
+ }
+ }
+ else
+ {
+ if (freq1[1] * freq2[1] <= freq1[0] * freq2[0])
+ {
+ d = std::abs(D) / (freq1[1] * freq2[1]);
+ }
+ else
+ {
+ d = std::abs(D) / (freq1[0] * freq2[0]);
+ }
+ }
+ Dprime.push_back(d);
+ }
+ }
+ return Dprime;
+}
+
+Vdouble SequenceStatistics::pairwiseR2(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ PolymorphismSequenceContainer* newpsc = SequenceStatistics::generateLDContainer(psc, keepsingleton, freqmin);
+ Vdouble R2;
+ size_t nbsite = newpsc->getNumberOfSites();
+ size_t nbseq = newpsc->getNumberOfSequences();
+ if (nbsite < 2)
+ throw DimensionException("SequenceStatistics::pairwiseD: less than two sites are available", nbsite, 2);
+ if (nbseq < 2)
+ throw DimensionException("SequenceStatistics::pairwiseD: less than two sequences are available", nbseq, 2);
+ for (size_t i = 0; i < nbsite - 1; i++)
+ {
+ for (size_t j = i + 1; j < nbsite; j++)
+ {
+ double haplo = 0;
+ const Site& site1 = newpsc->getSite(i);
+ const Site& site2 = newpsc->getSite(j);
+ map<int, double> freq1;
+ map<int, double> freq2;
+ SymbolListTools::getFrequencies(site1, freq1);
+ SymbolListTools::getFrequencies(site2, freq2);
+ for (size_t k = 0; k < nbseq; k++)
+ {
+ if (site1.getValue(k) + site2.getValue(k) == 2)
+ haplo++;
+ }
+ haplo = haplo / static_cast<double>(nbseq);
+ double r = ((haplo - freq1[1] * freq2[1]) * (haplo - freq1[1] * freq2[1])) / (freq1[0] * freq1[1] * freq2[0] * freq2[1]);
+ R2.push_back(r);
+ }
+ }
+ return R2;
+}
+
+/***********************************/
+/* Global LD and distance measures */
+/***********************************/
+
+double SequenceStatistics::meanD(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ Vdouble D = pairwiseD(psc, keepsingleton, freqmin);
+ return VectorTools::mean<double, double>(D);
+}
+
+double SequenceStatistics::meanDprime(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ try
+ {
+ Vdouble Dprime = pairwiseDprime(psc, keepsingleton, freqmin);
+ return VectorTools::mean<double, double>(Dprime);
+ }
+ catch (DimensionException& e)
+ {
+ throw e;
+ }
+}
+
+double SequenceStatistics::meanR2(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ try
+ {
+ Vdouble R2 = SequenceStatistics::pairwiseR2(psc, keepsingleton, freqmin);
+ return VectorTools::mean<double, double>(R2);
+ }
+ catch (DimensionException& e)
+ {
+ throw e;
+ }
+}
+
+double SequenceStatistics::meanDistance1(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ try
+ {
+ Vdouble dist = pairwiseDistances1(psc, keepsingleton, freqmin);
+ return VectorTools::mean<double, double>(dist);
+ }
+ catch (DimensionException& e)
+ {
+ throw e;
+ }
+}
+
+double SequenceStatistics::meanDistance2(const PolymorphismSequenceContainer& psc, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ try
+ {
+ Vdouble dist = pairwiseDistances2(psc, keepsingleton, freqmin);
+ return VectorTools::mean<double, double>(dist);
+ }
+ catch (DimensionException& e)
+ {
+ throw e;
+ }
+}
+
+/**********************/
+/* Regression methods */
+/**********************/
+
+double SequenceStatistics::originRegressionD(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ try
+ {
+ Vdouble D = pairwiseD(psc, keepsingleton, freqmin) - 1;
+ Vdouble dist;
+ if (distance1)
+ dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000;
+ else
+ dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000;
+ return VectorTools::sum(D * dist) / VectorTools::sum(dist * dist);
+ }
+ catch (DimensionException& e)
+ {
+ throw e;
+ }
+}
+
+double SequenceStatistics::originRegressionDprime(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ try
+ {
+ Vdouble Dprime = pairwiseDprime(psc, keepsingleton, freqmin) - 1;
+ Vdouble dist;
+ if (distance1)
+ dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000;
+ else
+ dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000;
+ return VectorTools::sum(Dprime * dist) / VectorTools::sum(dist * dist);
+ }
+ catch (DimensionException& e)
+ {
+ throw e;
+ }
+}
+
+double SequenceStatistics::originRegressionR2(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ try
+ {
+ Vdouble R2 = pairwiseR2(psc, keepsingleton, freqmin) - 1;
+ Vdouble dist;
+ if (distance1)
+ dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000;
+ else
+ dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000;
+ return VectorTools::sum(R2 * dist) / VectorTools::sum(dist * dist);
+ }
+ catch (DimensionException& e)
+ {
+ throw e;
+ }
+}
+
+Vdouble SequenceStatistics::linearRegressionD(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ try
+ {
+ Vdouble D = pairwiseD(psc, keepsingleton, freqmin);
+ Vdouble dist;
+ Vdouble reg(2);
+ if (distance1)
+ dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000;
+ else
+ dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000;
+ reg[0] = VectorTools::cov<double, double>(dist, D) / VectorTools::var<double, double>(dist);
+ reg[1] = VectorTools::mean<double, double>(D) - reg[0] * VectorTools::mean<double, double>(dist);
+ return reg;
+ }
+ catch (DimensionException& e)
+ {
+ throw e;
+ }
+}
+
+Vdouble SequenceStatistics::linearRegressionDprime(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ try
+ {
+ Vdouble Dprime = pairwiseDprime(psc, keepsingleton, freqmin);
+ Vdouble dist;
+ Vdouble reg(2);
+ if (distance1)
+ dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000;
+ else
+ dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000;
+ reg[0] = VectorTools::cov<double, double>(dist, Dprime) / VectorTools::var<double, double>(dist);
+ reg[1] = VectorTools::mean<double, double>(Dprime) - reg[0] * VectorTools::mean<double, double>(dist);
+ return reg;
+ }
+ catch (DimensionException& e)
+ {
+ throw e;
+ }
+}
+
+Vdouble SequenceStatistics::linearRegressionR2(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ try
+ {
+ Vdouble R2 = pairwiseR2(psc, keepsingleton, freqmin);
+ Vdouble dist;
+ Vdouble reg(2);
+ if (distance1)
+ dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000;
+ else
+ dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000;
+ reg[0] = VectorTools::cov<double, double>(dist, R2) / VectorTools::var<double, double>(dist);
+ reg[1] = VectorTools::mean<double, double>(R2) - reg[0] * VectorTools::mean<double, double>(dist);
+ return reg;
+ }
+ catch (DimensionException& e)
+ {
+ throw e;
+ }
+}
+
+double SequenceStatistics::inverseRegressionR2(const PolymorphismSequenceContainer& psc, bool distance1, bool keepsingleton, double freqmin) throw (DimensionException)
+{
+ try
+ {
+ Vdouble R2 = pairwiseR2(psc, keepsingleton, freqmin);
+ Vdouble unit(R2.size(), 1);
+ Vdouble R2transformed = unit / R2 - 1;
+ Vdouble dist;
+ if (distance1)
+ dist = pairwiseDistances1(psc, keepsingleton, freqmin) / 1000;
+ else
+ dist = pairwiseDistances2(psc, keepsingleton, freqmin) / 1000;
+ return VectorTools::sum(R2transformed * dist) / VectorTools::sum(dist * dist);
+ }
+ catch (DimensionException& e)
+ {
+ throw e;
+ }
+}
+
+/**********************/
+/* Hudson method */
+/**********************/
+
+double SequenceStatistics::hudson87(const PolymorphismSequenceContainer& psc, double precision, double cinf, double csup)
+{
+ double left = leftHandHudson_(psc);
+ size_t n = psc.getNumberOfSequences();
+ double dif = 1;
+ double c1 = cinf;
+ double c2 = csup;
+ if (SequenceStatistics::polymorphicSiteNumber(psc) < 2)
+ return -1;
+ if (rightHandHudson_(c1, n) < left)
+ return cinf;
+ if (rightHandHudson_(c2, n) > left)
+ return csup;
+ while (dif > precision)
+ {
+ if (rightHandHudson_((c1 + c2) / 2, n) > left)
+ c1 = (c1 + c2) / 2;
+ else
+ c2 = (c1 + c2) / 2;
+ dif = std::abs(2 * (c1 - c2) / (c1 + c2));
+ }
+ return (c1 + c2) / 2;
+}
+
+/*****************/
+/* Tests methods */
+/*****************/
+
+void SequenceStatistics::testUsefullValues(std::ostream& s, size_t n)
+{
+ map<string, double> v = getUsefullValues_(n);
+ double vD = getVD_(n, v["a1"], v["a2"], v["cn"]);
+ double uD = getUD_(v["a1"], vD);
+ double vDs = getVDstar_(n, v["a1"], v["a2"], v["dn"]);
+ double uDs = getUDstar_(n, v["a1"], vDs);
+
+ s << n << "\t";
+ s << v["a1"] << "\t";
+ s << v["a2"] << "\t";
+ s << v["a1n"] << "\t";
+ s << v["b1"] << "\t";
+ s << v["b2"] << "\t";
+ s << v["c1"] << "\t";
+ s << v["c2"] << "\t";
+ s << v["cn"] << "\t";
+ s << v["dn"] << "\t";
+ s << v["e1"] << "\t";
+ s << v["e2"] << "\t";
+ s << uD << "\t";
+ s << vD << "\t";
+ s << uDs << "\t";
+ s << vDs << endl;
+}
+
+// ******************************************************************************
+// Private methods
+// ******************************************************************************
+
+size_t SequenceStatistics::getMutationNumber_(const Site& site)
+{
+ size_t tmp_count = 0;
+ map<int, size_t> states_count;
+ SymbolListTools::getCounts(site, states_count);
+
+ for (map<int, size_t>::iterator it = states_count.begin(); it != states_count.end(); it++)
+ {
+ if (it->first >= 0)
+ tmp_count++;
+ }
+ if (tmp_count > 0)
+ tmp_count--;
+ return tmp_count;
+}
+
+size_t SequenceStatistics::getSingletonNumber_(const Site& site)
+{
+ size_t nus = 0;
+ map<int, size_t> states_count;
+ SymbolListTools::getCounts(site, states_count);
+ for (map<int, size_t>::iterator it = states_count.begin(); it != states_count.end(); it++)
+ {
+ if (it->second == 1)
+ nus++;
+ }
+ return nus;
+}
+
+size_t SequenceStatistics::getDerivedSingletonNumber_(const Site& site_in, const Site& site_out)
+{
+ size_t nus = 0;
+ map<int, size_t> states_count;
+ map<int, size_t> outgroup_states_count;
+ SymbolListTools::getCounts(site_in, states_count);
+ SymbolListTools::getCounts(site_out, outgroup_states_count);
+ // if there is more than one variant in the outgroup we will not be able to recover the ancestral state
+ if (outgroup_states_count.size() == 1)
+ {
+ for (map<int, size_t>::iterator it = states_count.begin(); it != states_count.end(); it++)
+ {
+ if (it->second == 1)
+ {
+ if (outgroup_states_count.find(it->first) == outgroup_states_count.end())
+ nus++;
+ }
+ }
+ }
+ return nus;
+}
+
+std::map<std::string, double> SequenceStatistics::getUsefullValues_(size_t n)
+{
+ double nn = static_cast<double>(n);
+ map<string, double> values;
+ values["a1"] = 0.;
+ values["a2"] = 0.;
+ values["a1n"] = 0.;
+ values["b1"] = 0.;
+ values["b2"] = 0.;
+ values["c1"] = 0.;
+ values["c2"] = 0.;
+ values["cn"] = 0.;
+ values["dn"] = 0.;
+ values["e1"] = 0.;
+ values["e2"] = 0.;
+ if (n > 1)
+ {
+ for (double i = 1; i < nn; i++)
+ {
+ values["a1"] += 1. / i;
+ values["a2"] += 1. / (i * i);
+ }
+ values["a1n"] = values["a1"] + (1. / nn);
+ values["b1"] = (nn + 1.) / (3. * (nn - 1.));
+ values["b2"] = 2. * ((nn * nn) + nn + 3.) / (9. * nn * (nn - 1.));
+ values["c1"] = values["b1"] - (1. / values["a1"]);
+ values["c2"] = values["b2"] - ((nn + 2.) / (values["a1"] * nn)) + (values["a2"] / (values["a1"] * values["a1"]));
+ if (n == 2)
+ {
+ values["cn"] = 1.;
+ values["dn"] = 2.;
+ }
+ else
+ {
+ values["cn"] = 2. * ((nn * values["a1"]) - (2. * (nn - 1.))) / ((nn - 1.) * (nn - 2.));
+ values["dn"] =
+ values["cn"]
+ + ((nn - 2.) / ((nn - 1.) * (nn - 1.)))
+ + (2. / (nn - 1.))
+ * ((3. / 2.) - (((2. * values["a1n"]) - 3.) / (nn - 2.)) - (1. / nn));
+ }
+ values["e1"] = values["c1"] / values["a1"];
+ values["e2"] = values["c2"] / ((values["a1"] * values["a1"]) + values["a2"]);
+ }
+ return values;
+}
+
+double SequenceStatistics::getVD_(size_t n, double a1, double a2, double cn)
+{
+ double nn = static_cast<double>(n);
+ if (n < 3)
+ return 0.;
+ double vD = 1. + ((a1 * a1) / (a2 + (a1 * a1))) * (cn - ((nn + 1.) / (nn - 1.)));
+ return vD;
+}
+
+double SequenceStatistics::getUD_(double a1, double vD)
+{
+ return a1 - 1. - vD;
+}
+
+double SequenceStatistics::getVDstar_(size_t n, double a1, double a2, double dn)
+{
+ double denom = (a1 * a1) + a2;
+ if (n < 3 || denom == 0.)
+ return 0.;
+ double nn = static_cast<double>(n);
+ double nnn = nn / (nn - 1.);
+ // Fu & Li 1993
+ double vDs = (
+ (nnn * nnn * a2)
+ + (a1 * a1 * dn)
+ - (2. * (nn * a1 * (a1 + 1)) / ((nn - 1.) * (nn - 1.)))
+ )
+ /
+ denom;
+ // Simonsen et al. 1995
+ /*
+ double vDs = (
+ (values["a2"] / pow(values["a1"], 2))
+ - (2./nn) * (1. + 1./values["a1"] - values["a1"] + values["a1"]/nn)
+ - 1./(nn*nn)
+ )
+ /
+ (pow(values["a1"], 2) + values["a2"]);
+ */
+ return vDs;
+}
+
+double SequenceStatistics::getUDstar_(size_t n, double a1, double vDs)
+{
+ if (n < 3)
+ return 0.;
+ double nn = static_cast<double>(n);
+ double nnn = nn / (nn - 1.);
+ // Fu & Li 1993
+ double uDs = (nnn * (a1 - nnn)) - vDs;
+ // Simonsen et al. 1995
+ /*
+ double uDs = (((nn - 1.)/nn - 1./values["a1"]) / values["a1"]) - vDs;
+ */
+ return uDs;
+}
+
+double SequenceStatistics::leftHandHudson_(const PolymorphismSequenceContainer& psc)
+{
+ PolymorphismSequenceContainer* newpsc = PolymorphismSequenceContainerTools::getCompleteSites(psc);
+ size_t nbseq = newpsc->getNumberOfSequences();
+ double S1 = 0;
+ double S2 = 0;
+ for (size_t i = 0; i < nbseq - 1; i++)
+ {
+ for (size_t j = i + 1; j < nbseq; j++)
+ {
+ SequenceSelection ss(2);
+ ss[0] = i;
+ ss[1] = j;
+ PolymorphismSequenceContainer* psc2 = PolymorphismSequenceContainerTools::getSelectedSequences(*newpsc, ss);
+ S1 += SequenceStatistics::watterson75(*psc2, true);
+ S2 += SequenceStatistics::watterson75(*psc2, true) * SequenceStatistics::watterson75(*psc2, true);
+ delete psc2;
+ }
+ }
+ double Sk = (2 * S2 - pow(2 * S1 / static_cast<double>(nbseq), 2.)) / pow(nbseq, 2.);
+ double H = SequenceStatistics::heterozygosity(*newpsc);
+ double H2 = SequenceStatistics::squaredHeterozygosity(*newpsc);
+ delete newpsc;
+ return static_cast<double>(Sk - H + H2) / pow(H * static_cast<double>(nbseq) / static_cast<double>(nbseq - 1), 2.);
+}
+
+double SequenceStatistics::rightHandHudson_(double c, size_t n)
+{
+ double nn = static_cast<double>(n);
+ return 1. / (97. * pow(c, 2.) * pow(nn, 3.)) * ((nn - 1.) * (97. * (c * (4. + (c - 2. * nn) * nn) + (-2. * (7. + c) + 4. * nn + (c - 1.) * pow(nn, 2.)) * log((18. + c * (13. + c)) / 18.)) + sqrt(97.) * (110. + nn * (49. * nn - 52.) + c * (2. + nn * (15. * nn - 8.))) * log(-1. + (72. + 26. * c) / (36. + 13. * c - c * sqrt(97.)))));
+}
+
diff --git a/src/Bpp/PopGen/SequenceStatistics.h b/src/Bpp/PopGen/SequenceStatistics.h
new file mode 100644
index 0000000..ffc279f
--- /dev/null
+++ b/src/Bpp/PopGen/SequenceStatistics.h
@@ -0,0 +1,1242 @@
+//
+// File SequenceStatistics.h
+// Authors: Eric Bazin
+// Sylvain Gaillard
+// Khalid Belkhir
+// Benoit Nabholz
+// Created on: Wed Aug 04 2004
+//
+
+/*
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for population genetics analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+// Secured inclusion of header's file
+#ifndef _SEQUENCESTATISTICS_H_
+#define _SEQUENCESTATISTICS_H_
+
+// From the SeqLib library
+#include <Bpp/Seq/SymbolListTools.h>
+#include <Bpp/Seq/Alphabet/CodonAlphabet.h>
+#include <Bpp/Seq/GeneticCode/GeneticCode.h>
+#include <Bpp/Seq/Container/SiteContainerIterator.h>
+#include <Bpp/Seq/Container/SiteContainer.h>
+#include <Bpp/Seq/Container/SiteContainerTools.h>
+
+#include "PolymorphismSequenceContainer.h"
+
+// From the STL
+#include <string>
+#include <map>
+#include <vector>
+
+namespace bpp
+{
+/**
+ * @brief Static class providing methods to compute statistics on sequences data.
+ *
+ * @author Sylvain Gaillard
+ */
+class SequenceStatistics
+{
+public:
+ /**
+ * @brief Compute the number of polymorphic site in an alignment
+ *
+ * The number of polymorphic site is also known as the number of
+ * segregating site @f$S at f$.
+ *
+ * Gaps are consider as mutations so if you want number of
+ * polymorphic site without gap, set the gapflag parameter to true.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag a boolean set by default to true if you don't want to
+ * take gap into account
+ * @param ignoreUnknown a boolean set by default to true to ignore
+ * unknown states
+ */
+ static size_t polymorphicSiteNumber(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true,
+ bool ignoreUnknown = true);
+
+ /**
+ * @brief Compute the number of parsimony informative sites in an alignment
+ *
+ * @param psc a PolymorphicSequenceContainer
+ * @param gapflag a boolean set by default to true if you don't want to
+ * take gap into account
+ */
+ static size_t parsimonyInformativeSiteNumber(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true);
+
+ /**
+ * @brief Count the number of singleton nucleotides in an alignment.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag a boolean set by default to true if you don't want to
+ * take gap into account
+ * @author Sylvain Gaillard
+ */
+ static size_t countSingleton(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true);
+
+ /**
+ * @brief Count the total number of mutations in an alignment.
+ *
+ * This count is assumed to be under an infinite site model.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag a boolean set by default to true if you don't want to
+ * take gap into account
+ * @author Sylvain Gaillard
+ */
+ static size_t totNumberMutations(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true);
+
+ /**
+ * @brief Count the total number of mutations in external branchs.
+ *
+ * This is counted as the number of distinct singleton nucleotide
+ * in the ingroup that are not shared with the outgroup.
+ * A site is ignored if it contains more than one variant in the outgroup.
+ * A site is ignored if it contains unresolved variants or gaps.
+ *
+ * @param ing a PolymorphismSequenceContainer the ingroup alignement
+ * @param outg a PolymorphismSequenceContainer the outgroup alignement
+ * @throw Exception if ing and outg are not of the same size (site number)
+ * @author Khalid Belkhir
+ */
+ static size_t totMutationsExternalBranchs(
+ const PolymorphismSequenceContainer& ing,
+ const PolymorphismSequenceContainer& outg)
+ throw (Exception);
+
+ /**
+ * @brief Compute the number of triplet in an alignment
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag a boolean set by default to true if you don't want to take gap into account
+ * @author Sylvain Glémin
+ */
+ static size_t tripletNumber(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true);
+
+ /**
+ * @brief Compute the sum of per site heterozygosity in an alignment
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag a boolean set by default to true if you don't want to take gap into account
+ */
+ static double heterozygosity(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true);
+
+ /**
+ * @brief Compute the sum of per site squared heterozygosity in an alignment
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag a boolean set by default to true if you don't want
+ * to take gap into account
+ */
+ static double squaredHeterozygosity(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true);
+
+ /**
+ * @brief Compute the mean GC content in an alignment
+ *
+ * @param psc a PolymorphismSequenceContainer
+ */
+ static double gcContent(
+ const PolymorphismSequenceContainer& psc);
+
+ /**
+ * @brief Return the number of GC alleles and the total number of alleles at polymorphic sites only
+ *
+ * G vs C and A vs T polymorphism are not taken into account
+ *
+ * @warning SG 15/03/2010: The code of this method is not clear. See
+ * implementation for more details.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag a boolean set by default to true if you don't want
+ * to take gap into account
+ * @return A std::vector of size 2 containing the number of GC alleles
+ * and the total number of alleles.
+ */
+ static std::vector<size_t> gcPolymorphism(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true);
+
+ /**
+ * @brief Compute diversity estimator Theta of Watterson (1975, Theor Popul Biol, 7 pp256-276)
+ *
+ * @f[
+ * \hat{\theta}_S=\frac{S}{a_1}
+ * @f]
+ * where @f$S at f$ is the number of polymorphic sites and @f$a_1 at f$ is
+ * describe in SequenceStatistics::_getUsefullValues().
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag flag set by default to true if you don't want to
+ * take gap into account
+ * @param ignoreUnknown a boolean set by default to true to ignore
+ * unknown states
+ * @author Sylvain Gaillard
+ */
+ static double watterson75(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true,
+ bool ignoreUnknown = true);
+
+ /**
+ * @brief Compute diversity estimator Theta of Tajima (1983, Genetics, 105 pp437-460)
+ *
+ * @f[
+ * \hat{\theta}_\pi=1-\sum_{i=1}^{S}\sum_{j=1}^{4}\frac{k_{j,i}\times\left(k_{j,i}-1\right)}
+ * {n_i\times\left(n_i-1\right)} \qquad \textrm{with }k_{j,i}>0
+ * @f]
+ * where @f$k_{j,i}@f$ is the count of the j<sup>th</sup> state at the
+ * i<sup>th</sup> site,
+ * @f$n_i at f$ the number of nucleotides and @f$S at f$ the number of
+ * polymorphic sites.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag flag set by default to true if you don't want to
+ * take gap into account
+ * @author Sylvain Gaillard
+ */
+ static double tajima83(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true);
+
+ /**
+ * @brief Compute diversity estimator Theta H (eq. 3) of Fay and Wu (2000, Genetics, 155: 1405-1413)
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param ancestralSites a Sequence containing the ancestral states
+ * (reconstructed independently) to fold the mutation in the psc SequenceContainer.
+ @author Benoit Nabholz
+ */
+ static double FayWu2000(
+ const PolymorphismSequenceContainer& psc,
+ const Sequence& ancestralSites);
+
+ /**
+ * @brief Return the number of haplotype in the sample.
+ * Depaulis and Veuille (1998, Mol Biol Evol, 12 pp1788-1790)
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag flag set by default to true if you don't want to
+ * take gap into account
+ * @author Éric Bazin
+ * @todo
+ * - remove unneeded Sequence Container recopy
+ * - work on Sequence rather on string
+ */
+ static size_t DVK(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true);
+
+ /**
+ * @brief Return the haplotype diversity of a sample.
+ * Depaulis and Veuille (1998, Mol Biol Evol, 12 pp1788-1790)
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag flag set by default to true if you don't want to
+ * take gaps into account
+ * @author Éric Bazin
+ * @todo
+ * - remove unneeded Sequence Container recopy
+ * - work on Sequence rather on string
+ */
+ static double DVH(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true);
+
+ /**
+ * @brief Return the number of transitions.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @author Éric Bazin
+ */
+ static size_t getNumberOfTransitions(
+ const PolymorphismSequenceContainer& psc);
+
+ /**
+ * @brief Return the number of transversions.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @author Éric Bazin
+ */
+ static size_t getNumberOfTransversions(
+ const PolymorphismSequenceContainer& psc);
+
+ /**
+ * @brief Return the ratio of transitions/transversions.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @author Éric Bazin
+ */
+ static double getTransitionsTransversionsRatio(
+ const PolymorphismSequenceContainer& psc )
+ throw (Exception);
+
+ /**
+ * @brief Compute the number of codon sites with stop codon
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag a boolean set by default to true if you don't want to
+ * take gaps into account
+ * @author Sylvain Glémin
+ */
+ static size_t stopCodonSiteNumber(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true);
+
+ /**
+ * @brief Compute the number of polymorphic codon with only one mutated site
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param stopflag a boolean set by default to true if you don't want
+ * to take stop codon neither undefined sites into account
+ * @param gapflag a boolean set by default to true if you don't want
+ * to take gaps into account
+ * @author Sylvain Glémin
+ * @bug Sylvain Gaillard 17/03/2010: stopflag don't work as expected
+ * because CompleteSiteIterator don't skip stop codon.
+ */
+ static size_t monoSitePolymorphicCodonNumber(
+ const PolymorphismSequenceContainer& psc,
+ bool stopflag = true,
+ bool gapflag = true);
+
+ /**
+ * @brief Compute the number of synonymous polymorphic codon sites
+ *
+ * Gaps and unresolved sites are automatically excluded
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @author Sylvain Glémin
+ * @author Éric Bazin
+ */
+ static size_t synonymousPolymorphicCodonNumber(
+ const PolymorphismSequenceContainer& psc,
+ const GeneticCode& gc);
+
+ /**
+ * @brief Compute the Watterson(1975,Theor Popul Biol, 7 pp256-276) estimator for synonymous positions
+ *
+ * Gaps and unresolved sites are automatically excluded
+ *
+ * In case of complex codon, the path that gives the minimum number of
+ * non-synonymous changes* is chosen. The argument minchange=true is sent
+ * to numberOfSynonymousDifferences used in this method.
+ * Otherwise, a non-integer number could be return.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @author Sylvain Glémin
+ */
+ static double watterson75Synonymous(
+ const PolymorphismSequenceContainer& psc,
+ const GeneticCode& gc);
+
+ /**
+ * @brief Compute the Watterson(1975, Theor Popul Biol, 7 pp256-276) estimator for non synonymous positions
+ *
+ * Gaps and unresolved sites are automatically excluded
+ *
+ * In case of complex codon, the path that gives the minimum number of
+ * non-synonymous changes is chosen. The argument minchange=true is sent
+ * to numberOfSynonymousDifferences used in this method.
+ * Otherwise, a non-integer number could be return.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @author Sylvain Glémin
+ */
+ static double watterson75NonSynonymous(
+ const PolymorphismSequenceContainer& psc,
+ const GeneticCode& gc);
+
+ /**
+ * @brief Compute the synonymous nucleotide diversity, pi
+ *
+ * Gaps and unresolved sites are automatically excluded
+ * If minchange = false (default option) the different paths are equally
+ * weighted.
+ * If minchange = true the path with the minimum number of non-synonymous
+ * change is chosen.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @param minchange a boolean set to false
+ * @author Sylvain Glémin
+ * @author Éric Bazin
+ */
+ static double piSynonymous(
+ const PolymorphismSequenceContainer& psc,
+ const GeneticCode& gc,
+ bool minchange = false);
+
+ /**
+ * @brief Compute the non-synonymous nucleotide diversity, pi
+ *
+ * Gaps and unresolved sites are automatically excluded
+ * If minchange = false (default option) the different paths are equally
+ * weighted.
+ * If minchange = true the path with the minimum number of non-synonymous
+ * change is chosen.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @param minchange a boolean set by default to false
+ * @author Sylvain Glémin
+ * @author Éric Bazin
+ */
+ static double piNonSynonymous(
+ const PolymorphismSequenceContainer& psc,
+ const GeneticCode& gc,
+ bool minchange = false);
+
+ /**
+ * @brief compute the mean number of synonymous site in an alignment
+ *
+ * A site is x% synonymous if x% of possible mutations are synonymous.
+ * The transition/transversion can be taken into account (use the
+ * variable ratio).
+ * Gaps and unresolved sites are automatically excluded.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @param ratio a double
+ * @author Sylvain Glémin
+ * @author Éric Bazin
+ */
+ static double meanSynonymousSitesNumber(
+ const PolymorphismSequenceContainer& psc,
+ const GeneticCode& gc,
+ double ratio = 1.);
+
+ /**
+ * @brief compute the mean number of non-synonymous site in an alignment
+ *
+ * A site is x% synonymous if x% of possible mutations are synonymous
+ * The transition/transversion can be taken into account (use the
+ * variable ratio).
+ * Gaps are automatically excluded
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @param ratio a double
+ * @author Éric Bazin
+ */
+ static double meanNonSynonymousSitesNumber(
+ const PolymorphismSequenceContainer& psc,
+ const GeneticCode& gc,
+ double ratio = 1.);
+
+ /**
+ * @brief compute the number of synonymous subsitutions in an alignment
+ *
+ * Gaps and unresolved sites are automatically excluded
+ *
+ * In case of complex codon, the path that gives the minimum number of
+ * non-synonymous changes is chosen. The argument minchange=true is sent
+ * to numberOfSynonymousDifferences used in this method.
+ * Otherwise, a non-integer number could be return.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @param freqmin a double, to exclude snp in frequency strictly lower
+ * than freqmin
+ */
+ static size_t synonymousSubstitutionsNumber(
+ const PolymorphismSequenceContainer& psc,
+ const GeneticCode& gc,
+ double freqmin = 0.);
+
+ /**
+ * @brief compute the number of non synonymous subsitutions in an alignment
+ *
+ * Gaps and unresolved sites are automatically excluded
+ *
+ * In case of complex codon, the path that gives the minimum number of
+ * non-synonymous changes is chosen. The argument minchange=true is sent
+ * to numberOfSynonymousDifferences used in this method.
+ * Otherwise, a non-integer number could be return.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @param freqmin a double, to exclude snp in frequency strictly lower
+ * than freqmin
+ */
+ static size_t nonSynonymousSubstitutionsNumber(
+ const PolymorphismSequenceContainer& psc,
+ const GeneticCode& gc,
+ double freqmin = 0.);
+
+ /**
+ * @brief compute the number of fixed differences between two alignements
+ *
+ * Gaps and unresolved sites are automatically excluded
+ *
+ * In case of complex codon, the path that gives the minimum number of
+ * non-synonymous changes is chosen. The argument minchange=true is sent
+ * to numberOfSynonymousDifferences used in this method.
+ * Otherwise, a non-integer number could be return.
+ * @param pscin a PolymorphismSequenceContainer
+ * @param pscout a PolymorphismSequenceContainer
+ * @param psccons a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @author Sylvain Glémin
+ * @bug Sylvain Gaillard 17.03.2010: should throw something if pscin,
+ * pscout and psccons have different length (site number).
+ */
+ static std::vector<size_t> fixedDifferences(
+ const PolymorphismSequenceContainer& pscin,
+ const PolymorphismSequenceContainer& pscout,
+ PolymorphismSequenceContainer& psccons,
+ const GeneticCode& gc);
+
+ /**
+ * @brief return a vector containing Pa, Ps, Da, Ds
+ *
+ * Gaps and unresolved sites are automatically excluded
+ * @param ingroup a PolymorphismSequenceContainer
+ * @param outgroup a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @param freqmin a double, to exclude snp in frequency strictly lower
+ * than freqmin
+ * @author Sylvain Glémin
+ */
+ static std::vector<size_t> MKtable(
+ const PolymorphismSequenceContainer& ingroup,
+ const PolymorphismSequenceContainer& outgroup,
+ const GeneticCode& gc,
+ double freqmin = 0.);
+
+ /**
+ * @brief return the neutrality index NI = (Pa/Ps)/(Da/Ds) (Rand & Kann 1996, Mol. Biol. Evol. 13 pp735-748)
+ *
+ * Return -1 if Ps or Da are zero
+ * Gaps and unresolved sites are automatically excluded
+ *
+ * @param ingroup a PolymorphismSequenceContainer
+ * @param outgroup a PolymorphismSequenceContainer
+ * @param gc a GeneticCode
+ * @param freqmin a double, to exclude snp in frequency strictly lower
+ * than freqmin
+ * @author Sylvain Glémin
+ */
+ static double neutralityIndex(
+ const PolymorphismSequenceContainer& ingroup,
+ const PolymorphismSequenceContainer& outgroup,
+ const GeneticCode& gc,
+ double freqmin = 0.);
+
+ /**
+ * @brief Return the Tajima's D test (Tajima 1989, Genetics 123 pp 585-595).
+ *
+ * Calculation using the number of polymorphic (segregating) sites.
+ * @f[
+ * D=\frac{\hat{\theta}_\pi-\hat{\theta}_S}{\sqrt{\textrm{V}\left(\hat{\theta}_\pi-\hat{\theta}_S\right)}}
+ * =\frac{\hat{\theta}_\pi-\hat{\theta}_S}{\sqrt{e_1S+e_2S(S-1)}}
+ * @f]
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag flag set by default to true if you don't want to
+ * take gap into account
+ * @throw ZeroDivisionException if S == 0
+ * @author Sylvain Gaillard
+ */
+ static double tajimaDSS(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true)
+ throw (ZeroDivisionException);
+
+ /**
+ * @brief Return the Tajima's D test (Tajima 1989, Genetics 123 pp 585-595).
+ *
+ * Calculation using the total number of mutation.
+ * @f[
+ * D=\frac{\hat{\theta}_\pi-\frac{\eta}{a_1}}{\sqrt{e_1\eta+e_2\eta(\eta-1)}}
+ * @f]
+ * @param psc a PolymorphismSequenceContainer
+ * @param gapflag flag set by default to true if you don't want to
+ * take gap into account
+ * @throw ZeroDivisionException if eta == 0
+ * @author Sylvain Gaillard
+ */
+ static double tajimaDTNM(
+ const PolymorphismSequenceContainer& psc,
+ bool gapflag = true)
+ throw (ZeroDivisionException);
+
+ /**
+ * @brief Return the Fu and Li D test (Fu & Li 1993, Genetics, 133 pp693-709).
+ *
+ * @param ingroup a PolymorphismSequenceContainer
+ * @param outgroup a PolymorphismSequenceContainer
+ * @param original true: use the Fu & Li methode, false: use mutations in
+ * external branch.
+ * @throw ZeroDivisionException if eta == 0
+ * @author Sylvain Gaillard
+ * @author Khalid Belkhir
+ *
+ * If one set original=false then the number of mutations will be used.
+ * If the outgroup contains more than one sequence the sites with more
+ * than one variant will not be considered for external branch mutations!
+ */
+ static double fuliD(
+ const PolymorphismSequenceContainer& ingroup,
+ const PolymorphismSequenceContainer& outgroup,
+ bool original = true)
+ throw (ZeroDivisionException);
+
+ /**
+ * @brief Return the Fu and Li D<sup>*</sup> test (Fu & Li 1993, Genetics, 133 pp693-709).
+ *
+ * @param group a PolymorphismSequenceContainer
+ * @author Sylvain Gaillard
+ */
+ static double fuliDstar(
+ const PolymorphismSequenceContainer& group)
+ throw (ZeroDivisionException);
+
+ /**
+ * @brief Return the Fu and Li F test (Fu & Li 1993, Genetics, 133 pp693-709).
+ *
+ * @param ingroup a PolymorphismSequenceContainer
+ * @param outgroup a PolymorphismSequenceContainer
+ * @param original true: use the Fu & Li methode, false: use mutations in
+ * external branch.
+ * @author Sylvain Gaillard
+ * @author Khalid Belkhir
+ *
+ * If one set original=false then the number of mutations will be used.
+ * If the outgroup contains more than one sequence the sites with more
+ * than one variant will not be considered for external branch mutations!
+ */
+ static double fuliF(
+ const PolymorphismSequenceContainer& ingroup,
+ const PolymorphismSequenceContainer& outgroup,
+ bool original = true)
+ throw (ZeroDivisionException);
+
+ /**
+ * @brief Return the Fu and Li F<sup>*</sup> test (Fu & Li 1993, Genetics, 133 pp693-709).
+ *
+ * @param group a PolymorphismSequenceContainer
+ * @author Sylvain Gaillard
+ */
+ static double fuliFstar(
+ const PolymorphismSequenceContainer& group)
+ throw (ZeroDivisionException);
+
+ /**
+ * Fst of Hudson, Slatkin and Maddison
+ *
+ * Taken from eq. 3 of Hudson, Slatkin and Maddison 1992 Genetics 132:153
+ *
+ * @f[
+ * F_{st} = 1 - \frac{H_w}{H_b}
+ * @f]
+ * where @f$H_w at f$ is mean number of differences between different
+ * sequences sampled from the same subpopulation, and @f$H_b at f$ is the
+ * mean number of differences between sequences sampled from the two
+ * different subpopulations sampled.
+ *
+ * @param psc a PolymorphismSequenceContainer will at least two populations
+ * @param id1 is the id of the population 1
+ * @param id2 is the id of the population 2
+ * @author Benoit Nabholz
+ */
+ double FstHudson92(
+ const PolymorphismSequenceContainer& psc,
+ size_t id1,
+ size_t id2);
+
+
+ /**
+ * @brief generate a special PolymorphismSequenceContainer for linkage disequilbrium analysis
+ *
+ * Create a PolymorphismSequenceContainer with only polymorphic site :
+ * The value 1 is assigned to the most frequent allele, and 0 to the
+ * least frequent.
+ * This psc is needed to compute Linkage Disequilibrium
+ * Statistics.
+ * Should be used before excluding gaps, but sites with gaps are not
+ * counted as polymorphic sites.
+ * Singleton can be excluded.
+ * Polymorphic site with the lowest frequency < threshold can be excluded.
+ * Only polymorphic sites with 2 alleles are kept.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @author Sylvain Glémin
+ * @bug Sylvain Gaillard 17/03/2010: Needs cleaning and lack of tests
+ * of usability. This methode assume that psc as a DNA alphabet but don't
+ * check for conformity.
+ * @todo
+ * - To be moved to PolymorphismSequenceContainerTools.
+ */
+ static PolymorphismSequenceContainer* generateLDContainer(
+ const PolymorphismSequenceContainer& psc,
+ bool keepsingleton = true,
+ double freqmin = 0.);
+
+ /**
+ * @brief give the vector of the pairwise distances between site positions corresponding to a LD SequencePolymorphismContainer
+ *
+ * Assume that all sequences have the same length
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites is lower than 2
+ * @author Sylvain Glémin
+ */
+ static Vdouble pairwiseDistances1(
+ const PolymorphismSequenceContainer& psc,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give the vector of all mean pairwise distance between two sites to a LD SequencePolymorphismContainer
+ *
+ * pairwise distances are computed for each sequence separately,
+ * excluding gaps. Then the mean is taken over all the sequences.
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites is lower than 2
+ * @author Sylvain Glémin
+ */
+ static Vdouble pairwiseDistances2(
+ const PolymorphismSequenceContainer& psc,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give the vector of all mean pairwise D value between two sites (Lewontin & Kojima 1964, Evolution 14 pp458-472)
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static Vdouble pairwiseD(
+ const PolymorphismSequenceContainer& psc,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give the vector of all mean pairwise D' value between two sites (Lewontin 1964, Genetics 49 pp49-67))
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static Vdouble pairwiseDprime(
+ const PolymorphismSequenceContainer& psc,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give the vector of all mean pairwise R² value between two sites (Hill & Robertson 1968, Theor. Appl. Genet., 38 pp226-231)
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static Vdouble pairwiseR2(
+ const PolymorphismSequenceContainer& psc,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give mean D over all pairwise comparisons
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static double meanD(
+ const PolymorphismSequenceContainer& psc,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give mean D' over all pairwise comparisons
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static double meanDprime(
+ const PolymorphismSequenceContainer& psc,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give mean R² over all pairwise comparisons
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static double meanR2(
+ const PolymorphismSequenceContainer& psc,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give mean pairwise distances between sites / method 1: differences between sequences are not taken into account
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites is lower than 2
+ * @author Sylvain Glémin
+ */
+ static double meanDistance1(
+ const PolymorphismSequenceContainer& psc,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give mean pairwise distances between sites / method 2: differences between sequences are taken into account
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites is lower than 2
+ * @author Sylvain Glémin
+ */
+ static double meanDistance2(
+ const PolymorphismSequenceContainer& psc,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give the slope of the regression |D| = 1+a*distance
+ *
+ * The slope is given in |D| per kb
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param distance1 a boolean (true to use distance1, false to use
+ * distance2, false by default)
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static double originRegressionD(
+ const PolymorphismSequenceContainer& psc,
+ bool distance1 = false,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give the slope of the regression |D'| = 1+a*distance
+ *
+ * The slope is given in |D'| per kb
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param distance1 a boolean (true to use distance1, false to use
+ * distance2, false by default)
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static double originRegressionDprime(
+ const PolymorphismSequenceContainer& psc,
+ bool distance1 = false,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give the slope of the regression R² = 1+a*distance
+ *
+ * The slope is given in R² per kb
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param distance1 a boolean (true to use distance1, false to use
+ * distance2, false by default)
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static double originRegressionR2(
+ const PolymorphismSequenceContainer& psc,
+ bool distance1 = false,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give the slope and the origin of the regression |D| = a*distance+b
+ *
+ * The slope is given in |D| per kb
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param distance1 a boolean (true to use distance1, false to use
+ * distance2, false by default)
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static Vdouble linearRegressionD(
+ const PolymorphismSequenceContainer& psc,
+ bool distance1 = false,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give the slope and the origin of the regression |D'| = a*distance+b
+ *
+ * The slope is given in |D'| per kb
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param distance1 a boolean (true to use distance1, false to use
+ * distance2, false by default)
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static Vdouble linearRegressionDprime(
+ const PolymorphismSequenceContainer& psc,
+ bool distance1 = false,
+ bool keepsingleton = true,
+ double freqmin = 0.) throw (DimensionException);
+
+ /**
+ * @brief give the slope and the origin of the regression R² = a*distance+b
+ *
+ * The slope is given in R² per kb
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param distance1 a boolean (true to use distance1, false to use
+ * distance2, false by default)
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static Vdouble linearRegressionR2(
+ const PolymorphismSequenceContainer& psc,
+ bool distance1 = false,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give the slope of the regression R² = 1/(1+a*distance)
+ *
+ * To fit the theoretical prediction R² = 1/(1+4Nr)
+ * The slope is given in R² per kb
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param distance1 a boolean (true to use distance1, false to use
+ * distance2, false by default)
+ * @param keepsingleton a boolean (true by default, false to exclude
+ * singleton)
+ * @param freqmin a float (to exlude site with the lowest allele
+ * frequency less than the threshold given by freqmin, 0 by default)
+ * @throw DimensionException if the number of sites or the number of
+ * sequences is lower than 2
+ * @author Sylvain Glémin
+ */
+ static double inverseRegressionR2(
+ const PolymorphismSequenceContainer& psc,
+ bool distance1 = false,
+ bool keepsingleton = true,
+ double freqmin = 0.)
+ throw (DimensionException);
+
+ /**
+ * @brief give estimate of C=4Nr using Hudson method (Hudson 1987, Genet. Res., 50 pp245-250)
+ *
+ * @param psc a PolymorphismSequenceContainer
+ * @param precision default value = 0.000001
+ * @param cinf initial value, by default cinf=0.001
+ * @param csup initial value, by default csup = 10000
+ * @author Sylvain Glémin
+ */
+ static double hudson87(
+ const PolymorphismSequenceContainer& psc,
+ double precision = 0.000001,
+ double cinf = 0.001,
+ double csup = 10000.);
+
+ /**
+ * @brief Test usefull values
+ * @param s a ostream where write the values
+ * @param n then number of observed sequences
+ * @author Sylvain Gaillard
+ */
+ static void testUsefullValues(
+ std::ostream& s,
+ size_t n);
+
+private:
+ /**
+ * @brief Count the number of mutation for a site.
+ */
+ static size_t getMutationNumber_(
+ const Site& site);
+
+ /**
+ * @brief Count the number of singleton for a site.
+ */
+ static size_t getSingletonNumber_(
+ const Site& site);
+
+ /**
+ * @brief Count the number of singleton for a site.
+ *
+ * will count singletons that are not in site_out (a site in outgroup)
+ * site_in is a site from an ingroup
+ * @author Khalid Belkhir
+ */
+ static size_t getDerivedSingletonNumber_(
+ const Site& site_in,
+ const Site& site_out);
+
+ /**
+ * @brief Get usefull values for theta estimators.
+ *
+ * @param n the number of observed sequences
+ *
+ * @return A map with 11 values. Keys are a1, a2, a1n, b1, b2, c1, c2,
+ * cn, dn, e1 and e2.
+ * The values are :
+ * @f[
+ * a_1=\sum_{i=1}^{n-1}\frac{1}{i} \qquad a_2=\sum_{i=1}^{n-1}\frac{1}{i^2}
+ * @f]
+ * @f[
+ * a_{1n}=\sum_{i=1}^{n}\frac{1}{i}
+ * @f]
+ * @f[
+ * b_1=\frac{n+1}{3(n-1)} \qquad b_2=\frac{2(n^2+n+3)}{9n(n-1)}
+ * @f]
+ * @f[
+ * c_1=b_1-\frac{1}{a_1} \qquad c_2=b_2-\frac{n+2}{a_1n}+\frac{a_2}{a_1^2}
+ * @f]
+ * @f[
+ * c_n=2\frac{na_1-2(n-1)}{(n-1)(n-2)}
+ * @f]
+ * @f[
+ * d_n=c_n+\frac{n-2}{(n-1)^2}+\frac{2}{n-1}\left(\frac{3}{2}-\frac{2a_{1n}-3}{n-2}-\frac{1}{n}\right)
+ * @f]
+ * @f[
+ * e_1=\frac{c_1}{a_1} \qquad e_2=\frac{c_2}{a_1^2+a_2}
+ * @f]
+ * where @f$n at f$ is the number of observed sequences.
+ *
+ * @author Sylvain Gaillard
+ */
+ static std::map<std::string, double> getUsefullValues_(
+ size_t n);
+
+ /**
+ * @brief Get the vD value of equation (32) in Fu & Li 1993, Genetics, 133 pp693-709)
+ *
+ * @param n the number of observed sequences
+ * @param a1 as describe in getUsefullValues
+ * @param a2 as describe in getUsefullValues
+ * @param cn as describe in getUsefullValues
+ *
+ * @return the vD value as double
+ *
+ * @author Sylvain Gaillard
+ */
+ static double getVD_(
+ size_t n,
+ double a1,
+ double a2,
+ double cn);
+
+ /**
+ * @brief Get the uD value of equation (32) in Fu & Li 1993, Genetics, 133 pp693-709)
+ *
+ * @param a1 as describe in getUsefullValues
+ * @param vD as provided by getVD_
+ *
+ * @return the uD value as double
+ *
+ * @author Sylvain Gaillard
+ */
+ static double getUD_(
+ double a1,
+ double vD);
+
+ /**
+ * @brief Get the vD* value of D* equation in Fu & Li 1993, Genetics, 133 pp693-709)
+ *
+ * @param n the number of observed sequences
+ * @param a1 as describe in getUsefullValues
+ * @param a2 as describe in getUsefullValues
+ * @param dn as describe in getUsefullValues
+ *
+ * @return the vD* value as double
+ *
+ * @author Sylvain Gaillard
+ */
+ static double getVDstar_(
+ size_t n,
+ double a1,
+ double a2,
+ double dn);
+
+ /**
+ * @brief Get the uD* value of D* equation in Fu & Li 1993, Genetics, 133 pp693-709)
+ *
+ * @param n the number of observed sequences
+ * @param a1 as describe in getUsefullValues
+ * @param vDs as provided by getVDstar_
+ *
+ * @return the uD* value as double
+ *
+ * @author Sylvain Gaillard
+ */
+ static double getUDstar_(
+ size_t n,
+ double a1,
+ double vDs);
+
+ /**
+ * @brief give the left hand term of equation (4) in Hudson (Hudson 1987, Genet. Res., 50 pp245-250)
+ * This term is used in hudson87
+ * @param psc a PolymorphismSequenceContainer
+ */
+ static double leftHandHudson_(
+ const PolymorphismSequenceContainer& psc);
+
+ /**
+ * @brief give the right hand term of equation (4) in Hudson (Hudson 1987, Genet. Res., 50 pp245-250)
+ * This term is used in hudson87
+ */
+ static double rightHandHudson_(
+ double c,
+ size_t n);
+
+ /************************************************************************/
+};
+} // end of namespace bpp;
+
+#endif // _SEQUENCESTATISTICS_H_
+
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..a18aa97
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,104 @@
+# CMake script for Bio++ PopGen
+# Author: Sylvain Gaillard
+# Created: 21/08/2009
+
+# File list
+SET(CPP_FILES
+ Bpp/PopGen/AbstractIDataSet.cpp
+ Bpp/PopGen/AbstractODataSet.cpp
+ Bpp/PopGen/AnalyzedLoci.cpp
+ Bpp/PopGen/AnalyzedSequences.cpp
+ Bpp/PopGen/BasicAlleleInfo.cpp
+ Bpp/PopGen/BiAlleleMonolocusGenotype.cpp
+ Bpp/PopGen/DarwinDon.cpp
+ Bpp/PopGen/DarwinVarSingle.cpp
+ Bpp/PopGen/DataSet.cpp
+ Bpp/PopGen/DataSetTools.cpp
+ Bpp/PopGen/Date.cpp
+ Bpp/PopGen/GeneMapperCsvExport.cpp
+ Bpp/PopGen/Genepop.cpp
+ Bpp/PopGen/GeneralExceptions.cpp
+ Bpp/PopGen/Genetix.cpp
+ Bpp/PopGen/Group.cpp
+ Bpp/PopGen/Individual.cpp
+ Bpp/PopGen/LocusInfo.cpp
+ Bpp/PopGen/MonoAlleleMonolocusGenotype.cpp
+ Bpp/PopGen/MonolocusGenotypeTools.cpp
+ Bpp/PopGen/MultiAlleleMonolocusGenotype.cpp
+ Bpp/PopGen/MultiSeqIndividual.cpp
+ Bpp/PopGen/MultilocusGenotype.cpp
+ Bpp/PopGen/MultilocusGenotypeStatistics.cpp
+ Bpp/PopGen/PolymorphismMultiGContainer.cpp
+ Bpp/PopGen/PolymorphismMultiGContainerTools.cpp
+ Bpp/PopGen/PolymorphismSequenceContainer.cpp
+ Bpp/PopGen/PolymorphismSequenceContainerTools.cpp
+ Bpp/PopGen/PopgenlibIO.cpp
+ Bpp/PopGen/SequenceStatistics.cpp
+ )
+SET(H_FILES
+ Bpp/PopGen/AbstractIDataSet.h
+ Bpp/PopGen/AbstractODataSet.h
+ Bpp/PopGen/AlleleInfo.h
+ Bpp/PopGen/AnalyzedLoci.h
+ Bpp/PopGen/AnalyzedSequences.h
+ Bpp/PopGen/BasicAlleleInfo.h
+ Bpp/PopGen/BiAlleleMonolocusGenotype.h
+ Bpp/PopGen/DarwinDon.h
+ Bpp/PopGen/DarwinVarSingle.h
+ Bpp/PopGen/DataSet.h
+ Bpp/PopGen/DataSetTools.h
+ Bpp/PopGen/Date.h
+ Bpp/PopGen/GeneMapperCsvExport.h
+ Bpp/PopGen/Genepop.h
+ Bpp/PopGen/GeneralExceptions.h
+ Bpp/PopGen/Genetix.h
+ Bpp/PopGen/Group.h
+ Bpp/PopGen/IDataSet.h
+ Bpp/PopGen/IODataSet.h
+ Bpp/PopGen/Individual.h
+ Bpp/PopGen/Locality.h
+ Bpp/PopGen/LocusInfo.h
+ Bpp/PopGen/MonoAlleleMonolocusGenotype.h
+ Bpp/PopGen/MonolocusGenotype.h
+ Bpp/PopGen/MonolocusGenotypeTools.h
+ Bpp/PopGen/MultiAlleleMonolocusGenotype.h
+ Bpp/PopGen/MultiSeqIndividual.h
+ Bpp/PopGen/MultilocusGenotype.h
+ Bpp/PopGen/MultilocusGenotypeStatistics.h
+ Bpp/PopGen/ODataSet.h
+ Bpp/PopGen/PolymorphismMultiGContainer.h
+ Bpp/PopGen/PolymorphismMultiGContainerTools.h
+ Bpp/PopGen/PolymorphismSequenceContainer.h
+ Bpp/PopGen/PolymorphismSequenceContainerTools.h
+ Bpp/PopGen/PopgenlibIO.h
+ Bpp/PopGen/SequenceStatistics.h
+ )
+
+# Build the static lib
+ADD_LIBRARY(bpppopgen-static STATIC ${CPP_FILES})
+SET_TARGET_PROPERTIES(bpppopgen-static
+ PROPERTIES OUTPUT_NAME bpp-popgen
+ CLEAN_DIRECT_OUTPUT 1
+ )
+TARGET_LINK_LIBRARIES(bpppopgen-static ${LIBS})
+
+# Build the shared lib
+ADD_LIBRARY(bpppopgen-shared SHARED ${CPP_FILES})
+SET_TARGET_PROPERTIES(bpppopgen-shared
+ PROPERTIES OUTPUT_NAME bpp-popgen
+ CLEAN_DIRECT_OUTPUT 1
+ VERSION ${BPPPOPGEN_VERSION}
+ SOVERSION ${BPPPOPGEN_VERSION_MAJOR}
+ )
+TARGET_LINK_LIBRARIES(bpppopgen-shared ${LIBS})
+
+# Install libs
+INSTALL(TARGETS bpppopgen-static DESTINATION lib${LIB_SUFFIX})
+INSTALL(TARGETS bpppopgen-shared DESTINATION lib${LIB_SUFFIX})
+
+# Install headers
+INSTALL(DIRECTORY Bpp/ DESTINATION include/Bpp FILES_MATCHING PATTERN "*.h")
+
+# Generate generic include files (.all)
+INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${CMAKE_SOURCE_DIR}/genIncludes.sh ${CMAKE_PREFIX_PATH}/include/Bpp)")
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libbpp-popgen.git
More information about the debian-med-commit
mailing list