[med-svn] [libbpp-seq] 01/06: Imported Upstream version 2.2.0
Andreas Tille
tille at debian.org
Sat Apr 9 06:28:58 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository libbpp-seq.
commit 849099131f16a8ddc8dc0a6ff0ce8b9534d36f74
Author: Andreas Tille <tille at debian.org>
Date: Sat Apr 9 08:15:07 2016 +0200
Imported Upstream version 2.2.0
---
CMakeLists.txt | 13 +-
COPYING.txt | 845 +++++++++-------
ChangeLog | 11 +
Doxyfile | 6 +-
Doxyfile => Doxyfile-build | 7 +-
bpp-seq.spec | 5 +-
debian/changelog | 7 +
debian/control | 8 +-
debian/copyright | 6 +-
debian/postinst | 22 +-
debian/postrm | 26 +-
debian/prerm | 22 +-
debian/rules | 4 +-
debian/postinst => genIncludes.sh | 10 +-
src/Bpp/Seq/Alphabet/AbstractAlphabet.cpp | 63 +-
src/Bpp/Seq/Alphabet/AbstractAlphabet.h | 110 +-
src/Bpp/Seq/Alphabet/Alphabet.h | 75 +-
.../AlphabetNumericState.h} | 100 +-
src/Bpp/Seq/Alphabet/AlphabetTools.cpp | 2 +-
src/Bpp/Seq/Alphabet/BinaryAlphabet.cpp | 11 +-
src/Bpp/Seq/Alphabet/BinaryAlphabet.h | 22 +-
src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.cpp | 6 +-
src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.h | 79 +-
src/Bpp/Seq/Alphabet/CodonAlphabet.cpp | 12 +-
src/Bpp/Seq/Alphabet/CodonAlphabet.h | 99 +-
src/Bpp/Seq/Alphabet/DNA.cpp | 47 +-
src/Bpp/Seq/Alphabet/DNA.h | 90 +-
src/Bpp/Seq/Alphabet/DefaultAlphabet.cpp | 13 +-
src/Bpp/Seq/Alphabet/DefaultAlphabet.h | 108 +-
.../EchinodermMitochondrialCodonAlphabet.cpp | 75 --
.../EchinodermMitochondrialCodonAlphabet.h | 72 --
.../{BinaryAlphabet.cpp => IntegerAlphabet.cpp} | 16 +-
.../{BinaryAlphabet.h => IntegerAlphabet.h} | 54 +-
.../InvertebrateMitochondrialCodonAlphabet.cpp | 74 --
.../InvertebrateMitochondrialCodonAlphabet.h | 75 --
...{StandardCodonAlphabet.h => LetterAlphabet.cpp} | 44 +-
src/Bpp/Seq/Alphabet/LetterAlphabet.h | 157 +--
src/Bpp/Seq/Alphabet/NucleicAlphabet.h | 164 +--
src/Bpp/Seq/Alphabet/NumericAlphabet.cpp | 189 ++++
src/Bpp/Seq/Alphabet/NumericAlphabet.h | 131 +++
src/Bpp/Seq/Alphabet/ProteicAlphabet.cpp | 271 ++---
src/Bpp/Seq/Alphabet/ProteicAlphabet.h | 185 ++--
src/Bpp/Seq/Alphabet/RNA.cpp | 47 +-
src/Bpp/Seq/Alphabet/RNA.h | 90 +-
src/Bpp/Seq/Alphabet/RNY.cpp | 1048 ++++++++++----------
src/Bpp/Seq/Alphabet/RNY.h | 15 +
src/Bpp/Seq/Alphabet/StandardCodonAlphabet.cpp | 75 --
.../VertebrateMitochondrialCodonAlphabet.cpp | 76 --
.../VertebrateMitochondrialCodonAlphabet.h | 71 --
src/Bpp/Seq/Alphabet/WordAlphabet.cpp | 60 +-
src/Bpp/Seq/Alphabet/WordAlphabet.h | 21 +-
.../Alphabet/YeastMitochondrialCodonAlphabet.cpp | 72 --
.../Seq/Alphabet/YeastMitochondrialCodonAlphabet.h | 75 --
src/Bpp/Seq/AlphabetIndex/AAChargeIndex.h | 4 +-
.../AAChenGuHuangHydrophobicityIndex.h | 4 +-
.../Seq/AlphabetIndex/AAChouFasmanAHelixIndex.h | 4 +-
.../Seq/AlphabetIndex/AAChouFasmanBSheetIndex.h | 4 +-
src/Bpp/Seq/AlphabetIndex/AAChouFasmanTurnIndex.h | 4 +-
src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.cpp | 2 +-
src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.h | 6 +-
src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.cpp | 13 +-
src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.h | 21 +-
src/Bpp/Seq/AlphabetIndex/AAMassIndex.h | 6 +-
src/Bpp/Seq/AlphabetIndex/AASEA1030Index.h | 6 +-
src/Bpp/Seq/AlphabetIndex/AASEAInf10Index.h | 6 +-
src/Bpp/Seq/AlphabetIndex/AASEASup30Index.h | 6 +-
src/Bpp/Seq/AlphabetIndex/AASurfaceIndex.h | 6 +-
src/Bpp/Seq/AlphabetIndex/AAVolumeIndex.h | 4 +-
src/Bpp/Seq/AlphabetIndex/AlphabetIndex1.h | 2 +
src/Bpp/Seq/AlphabetIndex/AlphabetIndex2.h | 11 +-
src/Bpp/Seq/AlphabetIndex/BLOSUM50.cpp | 14 +-
src/Bpp/Seq/AlphabetIndex/BLOSUM50.h | 1 +
.../Seq/AlphabetIndex/DefaultNucleotideScore.cpp | 10 +-
src/Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.h | 1 +
.../AlphabetIndex/GranthamAAChemicalDistance.cpp | 20 +-
.../Seq/AlphabetIndex/GranthamAAChemicalDistance.h | 2 +-
.../Seq/AlphabetIndex/GranthamAAPolarityIndex.h | 6 +-
src/Bpp/Seq/AlphabetIndex/GranthamAAVolumeIndex.h | 6 +-
src/Bpp/Seq/AlphabetIndex/KleinAANetChargeIndex.h | 6 +-
.../Seq/AlphabetIndex/MiyataAAChemicalDistance.cpp | 14 +-
.../Seq/AlphabetIndex/MiyataAAChemicalDistance.h | 6 +-
src/Bpp/Seq/AlphabetIndex/SimpleIndexDistance.h | 8 +-
src/Bpp/Seq/AlphabetIndex/SimpleScore.cpp | 20 +-
src/Bpp/Seq/AlphabetIndex/SimpleScore.h | 1 +
.../{AAIndex1Entry.h => UserAlphabetIndex1.h} | 87 +-
src/Bpp/Seq/App/SequenceApplicationTools.cpp | 176 ++--
src/Bpp/Seq/App/SequenceApplicationTools.h | 439 ++++----
src/Bpp/Seq/CodonSiteTools.cpp | 239 +++--
src/Bpp/Seq/CodonSiteTools.h | 54 +-
src/Bpp/Seq/Container/AlignedSequenceContainer.cpp | 22 +-
src/Bpp/Seq/Container/AlignedSequenceContainer.h | 1 +
.../Container/CompressedVectorSiteContainer.cpp | 10 +-
src/Bpp/Seq/Container/MapSequenceContainer.cpp | 4 +-
src/Bpp/Seq/Container/OrderedSequenceContainer.h | 4 +
.../Seq/Container/SequenceContainerIterator.cpp | 2 +-
src/Bpp/Seq/Container/SequenceContainerIterator.h | 2 +-
src/Bpp/Seq/Container/SequenceContainerTools.cpp | 5 +-
src/Bpp/Seq/Container/SequenceContainerTools.h | 4 +-
src/Bpp/Seq/Container/SiteContainerIterator.cpp | 22 +-
src/Bpp/Seq/Container/SiteContainerTools.cpp | 42 +-
src/Bpp/Seq/Container/SiteContainerTools.h | 269 ++---
src/Bpp/Seq/Container/VectorSequenceContainer.cpp | 8 +-
src/Bpp/Seq/Container/VectorSequenceContainer.h | 350 +++----
src/Bpp/Seq/Container/VectorSiteContainer.cpp | 30 +-
src/Bpp/Seq/DistanceMatrix.h | 45 +-
.../AscidianMitochondrialGeneticCode.cpp | 117 +++
.../GeneticCode/AscidianMitochondrialGeneticCode.h | 113 +++
.../EchinodermMitochondrialGeneticCode.cpp | 187 ++--
.../EchinodermMitochondrialGeneticCode.h | 62 +-
src/Bpp/Seq/GeneticCode/GeneticCode.cpp | 53 +-
src/Bpp/Seq/GeneticCode/GeneticCode.h | 136 ++-
.../InvertebrateMitochondrialGeneticCode.cpp | 189 ++--
.../InvertebrateMitochondrialGeneticCode.h | 60 +-
.../GeneticCode/MoldMitochondrialGeneticCode.cpp | 117 +++
.../Seq/GeneticCode/MoldMitochondrialGeneticCode.h | 111 +++
src/Bpp/Seq/GeneticCode/StandardGeneticCode.cpp | 189 ++--
src/Bpp/Seq/GeneticCode/StandardGeneticCode.h | 62 +-
.../VertebrateMitochondrialGeneticCode.cpp | 191 ++--
.../VertebrateMitochondrialGeneticCode.h | 64 +-
.../GeneticCode/YeastMitochondrialGeneticCode.cpp | 189 ++--
.../GeneticCode/YeastMitochondrialGeneticCode.h | 59 +-
src/Bpp/Seq/Io/BppOAlignmentReaderFormat.cpp | 57 +-
src/Bpp/Seq/Io/BppOAlignmentReaderFormat.h | 7 +-
src/Bpp/Seq/Io/BppOAlignmentWriterFormat.cpp | 57 +-
src/Bpp/Seq/Io/BppOAlignmentWriterFormat.h | 9 +-
src/Bpp/Seq/Io/BppOAlphabetIndex1Format.cpp | 1 -
src/Bpp/Seq/Io/BppOAlphabetIndex2Format.cpp | 9 +-
src/Bpp/Seq/Io/BppOSequenceReaderFormat.cpp | 59 +-
src/Bpp/Seq/Io/BppOSequenceReaderFormat.h | 8 +-
src/Bpp/Seq/Io/BppOSequenceStreamReaderFormat.h | 5 +-
src/Bpp/Seq/Io/BppOSequenceWriterFormat.cpp | 2 +-
src/Bpp/Seq/Io/BppOSequenceWriterFormat.h | 7 +-
src/Bpp/Seq/Io/Dcse.cpp | 4 +-
src/Bpp/Seq/Io/Fasta.cpp | 9 +-
src/Bpp/Seq/Io/Mase.cpp | 2 +-
src/Bpp/Seq/Io/MaseTools.cpp | 68 +-
src/Bpp/Seq/Io/MaseTools.h | 98 +-
src/Bpp/Seq/Io/Phylip.cpp | 26 +-
src/Bpp/Seq/Io/Phylip.h | 4 +-
src/Bpp/Seq/Sequence.cpp | 2 +-
src/Bpp/Seq/SequencePositionIterators.cpp | 35 +-
src/Bpp/Seq/SequenceTools.cpp | 64 +-
src/Bpp/Seq/SequenceTools.h | 33 +-
src/Bpp/Seq/SequenceWithAnnotation.cpp | 2 +-
src/Bpp/Seq/SequenceWithAnnotationTools.cpp | 6 +-
src/Bpp/Seq/SequenceWithAnnotationTools.h | 6 +-
src/Bpp/Seq/SequenceWithQuality.cpp | 8 +-
src/Bpp/Seq/SequenceWithQuality.h | 9 +-
src/Bpp/Seq/SiteTools.cpp | 18 +-
src/Bpp/Seq/SiteTools.h | 5 -
src/Bpp/Seq/StringSequenceTools.cpp | 16 +-
src/Bpp/Seq/StringSequenceTools.h | 6 +-
src/Bpp/Seq/SymbolList.cpp | 16 +-
src/CMakeLists.txt | 19 +-
test/test_alphabets.cpp | 18 +-
test/test_bowker.cpp | 2 +-
test/test_walker.cpp | 4 +-
157 files changed, 5246 insertions(+), 4379 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bf03ce6..0c66d62 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -63,13 +63,13 @@ MACRO(IMPROVED_FIND_LIBRARY OUTPUT_LIBS lib_name include_to_find)
SET(${lib_name}_NAMES ${lib_name} ${lib_name}lib ${lib_name}dll)
FIND_LIBRARY(${lib_name}_LIBRARY NAMES ${${lib_name}_NAMES} PATH_SUFFIXES lib${LIB_SUFFIX})
- IF(${lib_name}_LIBRARY)
+ IF(${${lib_name}_INCLUDE_DIR} MATCHES ${lib_name}_INCLUDE_DIR-NOTFOUND)
+ MESSAGE(FATAL_ERROR "${lib_name} required but not found.")
+ ELSE(${${lib_name}_INCLUDE_DIR} MATCHES ${lib_name}_INCLUDE_DIR-NOTFOUND)
MESSAGE("-- Library ${lib_name} found here:")
MESSAGE(" includes : ${${lib_name}_INCLUDE_DIR}")
MESSAGE(" libraries: ${${lib_name}_LIBRARY}")
- ELSE(${lib_name}_LIBRARY)
- MESSAGE(FATAL_ERROR "${lib_name} required but not found.")
- ENDIF(${lib_name}_LIBRARY)
+ ENDIF(${${lib_name}_INCLUDE_DIR} MATCHES ${lib_name}_INCLUDE_DIR-NOTFOUND)
#add the dependency:
INCLUDE_DIRECTORIES(${${lib_name}_INCLUDE_DIR})
@@ -101,9 +101,9 @@ ENDIF(NO_DEP_CHECK)
# Packager
SET(CPACK_PACKAGE_NAME "libbpp-seq")
SET(CPACK_PACKAGE_VENDOR "Bio++ Development Team")
-SET(CPACK_PACKAGE_VERSION "2.1.0")
+SET(CPACK_PACKAGE_VERSION "2.2.0")
SET(CPACK_PACKAGE_VERSION_MAJOR "2")
-SET(CPACK_PACKAGE_VERSION_MINOR "1")
+SET(CPACK_PACKAGE_VERSION_MINOR "2")
SET(CPACK_PACKAGE_VERSION_PATCH "0")
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "The Bio++ Sequence library")
SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING.txt")
@@ -121,7 +121,6 @@ SET(CPACK_SOURCE_IGNORE_FILES
".*\\\\.deb"
".*\\\\.rpm"
".*\\\\.dmg"
- ".*\\\\.sh"
".*\\\\..*\\\\.swp"
"src/\\\\..*"
"src/libbpp*"
diff --git a/COPYING.txt b/COPYING.txt
index 623b625..7e84b53 100644
--- a/COPYING.txt
+++ b/COPYING.txt
@@ -1,340 +1,505 @@
- GNU GENERAL PUBLIC LICENSE
- Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The licenses for most software are designed to take away your
-freedom to share and change it. By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users. This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must show them these terms so they know their
-rights.
-
- We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
- Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
- Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- GNU GENERAL PUBLIC LICENSE
- TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
- 0. This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License. The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language. (Hereinafter, translation is included without limitation in
-the term "modification".) Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
- 1. You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
- 2. You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
- a) You must cause the modified files to carry prominent notices
- stating that you changed the files and the date of any change.
-
- b) You must cause any work that you distribute or publish, that in
- whole or in part contains or is derived from the Program or any
- part thereof, to be licensed as a whole at no charge to all third
- parties under the terms of this License.
-
- c) If the modified program normally reads commands interactively
- when run, you must cause it, when started running for such
- interactive use in the most ordinary way, to print or display an
- announcement including an appropriate copyright notice and a
- notice that there is no warranty (or else, saying that you provide
- a warranty) and that users may redistribute the program under
- these conditions, and telling the user how to view a copy of this
- License. (Exception: if the Program itself is interactive but
- does not normally print such an announcement, your work based on
- the Program is not required to print an announcement.)
-
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
- 3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
- a) Accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of Sections
- 1 and 2 above on a medium customarily used for software interchange; or,
-
- b) Accompany it with a written offer, valid for at least three
- years, to give any third party, for a charge no more than your
- cost of physically performing source distribution, a complete
- machine-readable copy of the corresponding source code, to be
- distributed under the terms of Sections 1 and 2 above on a medium
- customarily used for software interchange; or,
-
- c) Accompany it with the information you received as to the offer
- to distribute corresponding source code. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form with such
- an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it. For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable. However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
- 4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License. Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
- 5. You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Program or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
- 6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
- 7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all. For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
- 8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded. In such case, this License incorporates
-the limitation as if written in the body of this License.
-
- 9. The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies a version number of this License which applies to it and "any
-later version", you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation. If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
- 10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission. For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this. Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
- NO WARRANTY
-
- 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
- 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
- Gnomovision version 69, Copyright (C) year name of author
- Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, the commands you use may
-be called something other than `show w' and `show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary. Here is a sample; alter the names:
-
- Yoyodyne, Inc., hereby disclaims all copyright interest in the program
- `Gnomovision' (which makes passes at compilers) written by James Hacker.
-
- <signature of Ty Coon>, 1 April 1989
- Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs. If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library. If this is what you want to do, use the GNU Library General
-Public License instead of this License.
+
+ CeCILL FREE SOFTWARE LICENSE AGREEMENT
+
+
+ Notice
+
+This Agreement is a Free Software license agreement that is the result
+of discussions between its authors in order to ensure compliance with
+the two main principles guiding its drafting:
+
+ * firstly, compliance with the principles governing the distribution
+ of Free Software: access to source code, broad rights granted to
+ users,
+ * secondly, the election of a governing law, French law, with which
+ it is conformant, both as regards the law of torts and
+ intellectual property law, and the protection that it offers to
+ both authors and holders of the economic rights over software.
+
+The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[logiciel] L[ibre])
+license are:
+
+Commissariat à l'Energie Atomique - CEA, a public scientific, technical
+and industrial establishment, having its principal place of business at
+31-33 rue de la Fédération, 75752 Paris cedex 15, France.
+
+Centre National de la Recherche Scientifique - CNRS, a public scientific
+and technological establishment, having its principal place of business
+at 3 rue Michel-Ange 75794 Paris cedex 16, France.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, a public scientific and technological establishment, having its
+principal place of business at Domaine de Voluceau, Rocquencourt, BP
+105, 78153 Le Chesnay cedex, France.
+
+
+ Preamble
+
+The purpose of this Free Software license agreement is to grant users
+the right to modify and redistribute the software governed by this
+license within the framework of an open source distribution model.
+
+The exercising of these rights is conditional upon certain obligations
+for users so as to preserve this status for all subsequent redistributions.
+
+In consideration of access to the source code and the rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors only have limited liability.
+
+In this respect, the risks associated with loading, using, modifying
+and/or developing or reproducing the software by the user are brought to
+the user's attention, given its Free Software status, which may make it
+complicated to use, with the result that its use is reserved for
+developers and experienced professionals having in-depth computer
+knowledge. Users are therefore encouraged to load and test the
+Software's suitability as regards their requirements in conditions
+enabling the security of their systems and/or data to be ensured and,
+more generally, to use and operate it in the same conditions of
+security. This Agreement may be freely reproduced and published,
+provided it is not altered, and that no provisions are either added or
+removed herefrom.
+
+This Agreement may apply to any or all software for which the holder of
+the economic rights decides to submit the use thereof to its provisions.
+
+
+ Article 1 - DEFINITIONS
+
+For the purpose of this Agreement, when the following expressions
+commence with a capital letter, they shall have the following meaning:
+
+Agreement: means this license agreement, and its possible subsequent
+versions and annexes.
+
+Software: means the software in its Object Code and/or Source Code form
+and, where applicable, its documentation, "as is" when the Licensee
+accepts the Agreement.
+
+Initial Software: means the Software in its Source Code and possibly its
+Object Code form and, where applicable, its documentation, "as is" when
+it is first distributed under the terms and conditions of the Agreement.
+
+Modified Software: means the Software modified by at least one
+Contribution.
+
+Source Code: means all the Software's instructions and program lines to
+which access is required so as to modify the Software.
+
+Object Code: means the binary files originating from the compilation of
+the Source Code.
+
+Holder: means the holder(s) of the economic rights over the Initial
+Software.
+
+Licensee: means the Software user(s) having accepted the Agreement.
+
+Contributor: means a Licensee having made at least one Contribution.
+
+Licensor: means the Holder, or any other individual or legal entity, who
+distributes the Software under the Agreement.
+
+Contribution: means any or all modifications, corrections, translations,
+adaptations and/or new functions integrated into the Software by any or
+all Contributors, as well as any or all Internal Modules.
+
+Module: means a set of sources files including their documentation that
+enables supplementary functions or services in addition to those offered
+by the Software.
+
+External Module: means any or all Modules, not derived from the
+Software, so that this Module and the Software run in separate address
+spaces, with one calling the other when they are run.
+
+Internal Module: means any or all Module, connected to the Software so
+that they both execute in the same address space.
+
+GNU GPL: means the GNU General Public License version 2 or any
+subsequent version, as published by the Free Software Foundation Inc.
+
+Parties: mean both the Licensee and the Licensor.
+
+These expressions may be used both in singular and plural form.
+
+
+ Article 2 - PURPOSE
+
+The purpose of the Agreement is the grant by the Licensor to the
+Licensee of a non-exclusive, transferable and worldwide license for the
+Software as set forth in Article 5 hereinafter for the whole term of the
+protection granted by the rights over said Software.
+
+
+ Article 3 - ACCEPTANCE
+
+3.1 The Licensee shall be deemed as having accepted the terms and
+conditions of this Agreement upon the occurrence of the first of the
+following events:
+
+ * (i) loading the Software by any or all means, notably, by
+ downloading from a remote server, or by loading from a physical
+ medium;
+ * (ii) the first time the Licensee exercises any of the rights
+ granted hereunder.
+
+3.2 One copy of the Agreement, containing a notice relating to the
+characteristics of the Software, to the limited warranty, and to the
+fact that its use is restricted to experienced users has been provided
+to the Licensee prior to its acceptance as set forth in Article 3.1
+hereinabove, and the Licensee hereby acknowledges that it has read and
+understood it.
+
+
+ Article 4 - EFFECTIVE DATE AND TERM
+
+
+ 4.1 EFFECTIVE DATE
+
+The Agreement shall become effective on the date when it is accepted by
+the Licensee as set forth in Article 3.1.
+
+
+ 4.2 TERM
+
+The Agreement shall remain in force for the entire legal term of
+protection of the economic rights over the Software.
+
+
+ Article 5 - SCOPE OF RIGHTS GRANTED
+
+The Licensor hereby grants to the Licensee, who accepts, the following
+rights over the Software for any or all use, and for the term of the
+Agreement, on the basis of the terms and conditions set forth hereinafter.
+
+Besides, if the Licensor owns or comes to own one or more patents
+protecting all or part of the functions of the Software or of its
+components, the Licensor undertakes not to enforce the rights granted by
+these patents against successive Licensees using, exploiting or
+modifying the Software. If these patents are transferred, the Licensor
+undertakes to have the transferees subscribe to the obligations set
+forth in this paragraph.
+
+
+ 5.1 RIGHT OF USE
+
+The Licensee is authorized to use the Software, without any limitation
+as to its fields of application, with it being hereinafter specified
+that this comprises:
+
+ 1. permanent or temporary reproduction of all or part of the Software
+ by any or all means and in any or all form.
+
+ 2. loading, displaying, running, or storing the Software on any or
+ all medium.
+
+ 3. entitlement to observe, study or test its operation so as to
+ determine the ideas and principles behind any or all constituent
+ elements of said Software. This shall apply when the Licensee
+ carries out any or all loading, displaying, running, transmission
+ or storage operation as regards the Software, that it is entitled
+ to carry out hereunder.
+
+
+ 5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
+
+The right to make Contributions includes the right to translate, adapt,
+arrange, or make any or all modifications to the Software, and the right
+to reproduce the resulting Software.
+
+The Licensee is authorized to make any or all Contributions to the
+Software provided that it includes an explicit notice that it is the
+author of said Contribution and indicates the date of the creation thereof.
+
+
+ 5.3 RIGHT OF DISTRIBUTION
+
+In particular, the right of distribution includes the right to publish,
+transmit and communicate the Software to the general public on any or
+all medium, and by any or all means, and the right to market, either in
+consideration of a fee, or free of charge, one or more copies of the
+Software by any means.
+
+The Licensee is further authorized to distribute copies of the modified
+or unmodified Software to third parties according to the terms and
+conditions set forth hereinafter.
+
+
+ 5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
+
+The Licensee is authorized to distribute true copies of the Software in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Software is
+redistributed, the Licensee allows future Licensees unhindered access to
+the full Source Code of the Software by indicating how to access it, it
+being understood that the additional cost of acquiring the Source Code
+shall not exceed the cost of transferring the data.
+
+
+ 5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
+
+When the Licensee makes a Contribution to the Software, the terms and
+conditions for the distribution of the Modified Software become subject
+to all the provisions of this Agreement.
+
+The Licensee is authorized to distribute the Modified Software, in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Modified
+Software is redistributed, the Licensee allows future Licensees
+unhindered access to the full Source Code of the Modified Software by
+indicating how to access it, it being understood that the additional
+cost of acquiring the Source Code shall not exceed the cost of
+transferring the data.
+
+
+ 5.3.3 DISTRIBUTION OF EXTERNAL MODULES
+
+When the Licensee has developed an External Module, the terms and
+conditions of this Agreement do not apply to said External Module, that
+may be distributed under a separate license agreement.
+
+
+ 5.3.4 COMPATIBILITY WITH THE GNU GPL
+
+The Licensee can include a code that is subject to the provisions of one
+of the versions of the GNU GPL in the Modified or unmodified Software,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+The Licensee can include the Modified or unmodified Software in a code
+that is subject to the provisions of one of the versions of the GNU GPL,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+
+ Article 6 - INTELLECTUAL PROPERTY
+
+
+ 6.1 OVER THE INITIAL SOFTWARE
+
+The Holder owns the economic rights over the Initial Software. Any or
+all use of the Initial Software is subject to compliance with the terms
+and conditions under which the Holder has elected to distribute its work
+and no one shall be entitled to modify the terms and conditions for the
+distribution of said Initial Software.
+
+The Holder undertakes that the Initial Software will remain ruled at
+least by the current license, for the duration set forth in article 4.2.
+
+
+ 6.2 OVER THE CONTRIBUTIONS
+
+A Licensee who develops a Contribution is the owner of the intellectual
+property rights over this Contribution as defined by applicable law.
+
+
+ 6.3 OVER THE EXTERNAL MODULES
+
+A Licensee who develops an External Module is the owner of the
+intellectual property rights over this External Module as defined by
+applicable law and is free to choose the type of agreement that shall
+govern its distribution.
+
+
+ 6.4 JOINT PROVISIONS
+
+The Licensee expressly undertakes:
+
+ 1. not to remove, or modify, in any manner, the intellectual property
+ notices attached to the Software;
+
+ 2. to reproduce said notices, in an identical manner, in the copies
+ of the Software modified or not.
+
+The Licensee undertakes not to directly or indirectly infringe the
+intellectual property rights of the Holder and/or Contributors on the
+Software and to take, where applicable, vis-à-vis its staff, any and all
+measures required to ensure respect of said intellectual property rights
+of the Holder and/or Contributors.
+
+
+ Article 7 - RELATED SERVICES
+
+7.1 Under no circumstances shall the Agreement oblige the Licensor to
+provide technical assistance or maintenance services for the Software.
+
+However, the Licensor is entitled to offer this type of services. The
+terms and conditions of such technical assistance, and/or such
+maintenance, shall be set forth in a separate instrument. Only the
+Licensor offering said maintenance and/or technical assistance services
+shall incur liability therefor.
+
+7.2 Similarly, any Licensor is entitled to offer to its licensees, under
+its sole responsibility, a warranty, that shall only be binding upon
+itself, for the redistribution of the Software and/or the Modified
+Software, under terms and conditions that it is free to decide. Said
+warranty, and the financial terms and conditions of its application,
+shall be subject of a separate instrument executed between the Licensor
+and the Licensee.
+
+
+ Article 8 - LIABILITY
+
+8.1 Subject to the provisions of Article 8.2, the Licensee shall be
+entitled to claim compensation for any direct loss it may have suffered
+from the Software as a result of a fault on the part of the relevant
+Licensor, subject to providing evidence thereof.
+
+8.2 The Licensor's liability is limited to the commitments made under
+this Agreement and shall not be incurred as a result of in particular:
+(i) loss due the Licensee's total or partial failure to fulfill its
+obligations, (ii) direct or consequential loss that is suffered by the
+Licensee due to the use or performance of the Software, and (iii) more
+generally, any consequential loss. In particular the Parties expressly
+agree that any or all pecuniary or business loss (i.e. loss of data,
+loss of profits, operating loss, loss of customers or orders,
+opportunity cost, any disturbance to business activities) or any or all
+legal proceedings instituted against the Licensee by a third party,
+shall constitute consequential loss and shall not provide entitlement to
+any or all compensation from the Licensor.
+
+
+ Article 9 - WARRANTY
+
+9.1 The Licensee acknowledges that the scientific and technical
+state-of-the-art when the Software was distributed did not enable all
+possible uses to be tested and verified, nor for the presence of
+possible defects to be detected. In this respect, the Licensee's
+attention has been drawn to the risks associated with loading, using,
+modifying and/or developing and reproducing the Software which are
+reserved for experienced users.
+
+The Licensee shall be responsible for verifying, by any or all means,
+the product's suitability for its requirements, its good working order,
+and for ensuring that it shall not cause damage to either persons or
+properties.
+
+9.2 The Licensor hereby represents, in good faith, that it is entitled
+to grant all the rights over the Software (including in particular the
+rights set forth in Article 5).
+
+9.3 The Licensee acknowledges that the Software is supplied "as is" by
+the Licensor without any other express or tacit warranty, other than
+that provided for in Article 9.2 and, in particular, without any warranty
+as to its commercial value, its secured, safe, innovative or relevant
+nature.
+
+Specifically, the Licensor does not warrant that the Software is free
+from any error, that it will operate without interruption, that it will
+be compatible with the Licensee's own equipment and software
+configuration, nor that it will meet the Licensee's requirements.
+
+9.4 The Licensor does not either expressly or tacitly warrant that the
+Software does not infringe any third party intellectual property right
+relating to a patent, software or any other property right. Therefore,
+the Licensor disclaims any and all liability towards the Licensee
+arising out of any or all proceedings for infringement that may be
+instituted in respect of the use, modification and redistribution of the
+Software. Nevertheless, should such proceedings be instituted against
+the Licensee, the Licensor shall provide it with technical and legal
+assistance for its defense. Such technical and legal assistance shall be
+decided on a case-by-case basis between the relevant Licensor and the
+Licensee pursuant to a memorandum of understanding. The Licensor
+disclaims any and all liability as regards the Licensee's use of the
+name of the Software. No warranty is given as regards the existence of
+prior rights over the name of the Software or as regards the existence
+of a trademark.
+
+
+ Article 10 - TERMINATION
+
+10.1 In the event of a breach by the Licensee of its obligations
+hereunder, the Licensor may automatically terminate this Agreement
+thirty (30) days after notice has been sent to the Licensee and has
+remained ineffective.
+
+10.2 A Licensee whose Agreement is terminated shall no longer be
+authorized to use, modify or distribute the Software. However, any
+licenses that it may have granted prior to termination of the Agreement
+shall remain valid subject to their having been granted in compliance
+with the terms and conditions hereof.
+
+
+ Article 11 - MISCELLANEOUS
+
+
+ 11.1 EXCUSABLE EVENTS
+
+Neither Party shall be liable for any or all delay, or failure to
+perform the Agreement, that may be attributable to an event of force
+majeure, an act of God or an outside cause, such as defective
+functioning or interruptions of the electricity or telecommunications
+networks, network paralysis following a virus attack, intervention by
+government authorities, natural disasters, water damage, earthquakes,
+fire, explosions, strikes and labor unrest, war, etc.
+
+11.2 Any Failure by either Party, on one or more occasions, to invoke
+one or more of the provisions hereof, shall under no circumstances be
+interpreted as being a waiver by the interested Party of its right to
+invoke said provision(s) subsequently.
+
+11.3 The Agreement cancels and replaces any or all previous agreements,
+whether written or oral, between the Parties and having the same
+purpose, and constitutes the entirety of the agreement between said
+Parties concerning said purpose. No supplement or modification to the
+terms and conditions hereof shall be effective as between the Parties
+unless it is made in writing and signed by their duly authorized
+representatives.
+
+11.4 In the event that one or more of the provisions hereof were to
+conflict with a current or future applicable act or legislative text,
+said act or legislative text shall prevail, and the Parties shall make
+the necessary amendments so as to comply with said act or legislative
+text. All other provisions shall remain effective. Similarly, invalidity
+of a provision of the Agreement, for any reason whatsoever, shall not
+cause the Agreement as a whole to be invalid.
+
+
+ 11.5 LANGUAGE
+
+The Agreement is drafted in both French and English and both versions
+are deemed authentic.
+
+
+ Article 12 - NEW VERSIONS OF THE AGREEMENT
+
+12.1 Any person is authorized to duplicate and distribute copies of this
+Agreement.
+
+12.2 So as to ensure coherence, the wording of this Agreement is
+protected and may only be modified by the authors of the License, who
+reserve the right to periodically publish updates or new versions of the
+Agreement, each with a separate number. These subsequent versions may
+address new issues encountered by Free Software.
+
+12.3 Any Software distributed under a given version of the Agreement may
+only be subsequently distributed under the same version of the Agreement
+or a subsequent version, subject to the provisions of Article 5.3.4.
+
+
+ Article 13 - GOVERNING LAW AND JURISDICTION
+
+13.1 The Agreement is governed by French law. The Parties agree to
+endeavor to seek an amicable solution to any disagreements or disputes
+that may arise during the performance of the Agreement.
+
+13.2 Failing an amicable solution within two (2) months as from their
+occurrence, and unless emergency proceedings are necessary, the
+disagreements or disputes shall be referred to the Paris Courts having
+jurisdiction, by the more diligent Party.
+
+
+Version 2.0 dated 2005-05-21.
diff --git a/ChangeLog b/ChangeLog
index 433ca47..de92f4c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+23/09/14 -*- Version 2.2.0 -*-
+
+23/09/14 Julien Dutheil
+* Fixed bug #73
+
+16/09/14 Julien Dutheil
+* Fixed bug #79
+
+24/07/14 Julien Dutheil
+* Alphabet code gets a refreshment!
+
05/03/13 -*- Version 2.1.0 -*-
07/02/13 Julien Dutheil
diff --git a/Doxyfile b/Doxyfile
index 86feefb..eab3c61 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -32,7 +32,7 @@ PROJECT_NAME = bpp-seq
# This could be handy for archiving the generated documentation or
# if some version control system is used.
-PROJECT_NUMBER = 2.1.0
+PROJECT_NUMBER = 2.2.0
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer
@@ -343,7 +343,7 @@ TYPEDEF_HIDES_STRUCT = NO
# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
# corresponding to a cache size of 2^16 = 65536 symbols.
-SYMBOL_CACHE_SIZE = 0
+# SYMBOL_CACHE_SIZE = 0
# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
@@ -1712,7 +1712,7 @@ DOT_NUM_THREADS = 0
# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
# directory containing the font.
-DOT_FONTNAME = FreeSans
+# DOT_FONTNAME = FreeSans
# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
# The default size is 10pt.
diff --git a/Doxyfile b/Doxyfile-build
similarity index 99%
copy from Doxyfile
copy to Doxyfile-build
index 86feefb..71e9c08 100644
--- a/Doxyfile
+++ b/Doxyfile-build
@@ -32,7 +32,7 @@ PROJECT_NAME = bpp-seq
# This could be handy for archiving the generated documentation or
# if some version control system is used.
-PROJECT_NUMBER = 2.1.0
+PROJECT_NUMBER = 2.2.0
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer
@@ -343,7 +343,7 @@ TYPEDEF_HIDES_STRUCT = NO
# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
# corresponding to a cache size of 2^16 = 65536 symbols.
-SYMBOL_CACHE_SIZE = 0
+# SYMBOL_CACHE_SIZE = 0
# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
@@ -1712,7 +1712,7 @@ DOT_NUM_THREADS = 0
# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
# directory containing the font.
-DOT_FONTNAME = FreeSans
+# DOT_FONTNAME = FreeSans
# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
# The default size is 10pt.
@@ -1886,3 +1886,4 @@ GENERATE_LEGEND = YES
# the various graphs.
DOT_CLEANUP = YES
+OUTPUT_DIRECTORY=/home/jdutheil/Devel/Cpp/Projects/bpp-seq
diff --git a/bpp-seq.spec b/bpp-seq.spec
index c8eb8fa..8c57200 100644
--- a/bpp-seq.spec
+++ b/bpp-seq.spec
@@ -1,5 +1,5 @@
%define _basename bpp-seq
-%define _version 2.1.0
+%define _version 2.2.0
%define _release 1
%define _prefix /usr
@@ -173,6 +173,9 @@ exit 0
%{_prefix}/include/*
%changelog
+* Thu Sep 23 2014 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.2.0-1
+- Alphabet classes refreshed and updated
+- Bug fixes.
* Tue Mar 05 2013 Julien Dutheil <julien.dutheil at univ-montp2.fr> 2.1.0-1
- 'omics' tools now in bpp-seq-omics
- Extended BppO support
diff --git a/debian/changelog b/debian/changelog
index 660977c..db8e565 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+libbpp-seq (2.2.0-1) unstable; urgency=low
+
+ * Alphabet classes refreshed and updated
+ * Bug fixes.
+
+ -- Julien Dutheil <julien.dutheil at univ-montp2.fr> Thu, 23 Sep 2014 14:00:00 +0100
+
libbpp-seq (2.1.0-1) unstable; urgency=low
* 'omics' tools now in bpp-seq-omics
diff --git a/debian/control b/debian/control
index 8e2765b..02efa76 100644
--- a/debian/control
+++ b/debian/control
@@ -4,14 +4,14 @@ Priority: optional
Maintainer: Loic Dachary <loic at dachary.org>
Uploaders: Julien Dutheil <julien.dutheil at univ-montp2.fr>
Build-Depends: debhelper (>= 5), cmake (>= 2.6),
- libbpp-core-dev (>= 2.1.0)
-Standards-Version: 3.9.1
+ libbpp-core-dev (>= 2.2.0)
+Standards-Version: 3.9.4
Package: libbpp-seq-dev
Section: libdevel
Architecture: any
Depends: libbpp-seq9 (= ${binary:Version}), ${misc:Depends},
- libbpp-core-dev (>= 2.1.0)
+ libbpp-core-dev (>= 2.2.0)
Description: Bio++ Sequence library development files.
Contains the Bio++ classes for sequence analysis.
@@ -19,7 +19,7 @@ Package: libbpp-seq9
Section: libs
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends},
- libbpp-core2 (>= 2.1.0)
+ libbpp-core2 (>= 2.2.0)
Description: Bio++ Sequence library.
Contains the Bio++ classes for sequence analysis.
diff --git a/debian/copyright b/debian/copyright
index e589092..1f950f5 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -1,5 +1,5 @@
This package was debianized by Julien Dutheil <julien.dutheil at univ-montp2.fr> on
-Tue, 05 Mar 2013 14:34:00 +0100.
+Thu, 23 Sep 2014 14:00:00 +0100.
It was downloaded from <http://biopp.univ-montp2.fr/Repositories/sources>
@@ -9,7 +9,7 @@ Upstream Author:
Copyright:
- Copyright (C) 2013 Bio++ Development Team
+ Copyright (C) 2014 Bio++ Development Team
License:
@@ -30,7 +30,7 @@ License:
On Debian systems, the complete text of the GNU General
Public License can be found in `/usr/share/common-licenses/GPL'.
-The Debian packaging is (C) 2013, Julien Dutheil <julien.dutheil at univ-montp2.fr> and
+The Debian packaging is (C) 2014, Julien Dutheil <julien.dutheil at univ-montp2.fr> and
is licensed under the GPL, see above.
The provided software is distributed under the CeCILL license:
diff --git a/debian/postinst b/debian/postinst
index cf9e925..cff89b1 100755
--- a/debian/postinst
+++ b/debian/postinst
@@ -35,9 +35,23 @@ createGeneric() {
done;
}
-if [ "$1" = "configure" ]; then
- # Actualize .all files
- createGeneric /usr/include/Bpp
-fi
+case "$1" in
+ configure)
+ # Actualize .all files
+ createGeneric /usr/include/Bpp
+ ;;
+ abort-upgrade|abort-remove|abort-deconfigure)
+ echo "$1"
+ ;;
+ *)
+ echo "postinst called with unknown argument \`\$1'" >&2
+ exit 0
+ ;;
+esac
+
+# dh_installdeb will replace this with shell code automatically
+# generated by other debhelper scripts.
+
+#DEBHELPER#
exit 0
diff --git a/debian/postrm b/debian/postrm
index 3931669..744f8b1 100755
--- a/debian/postrm
+++ b/debian/postrm
@@ -35,11 +35,25 @@ createGeneric() {
done;
}
-if [ "$1" = "remove" ]; then
- # Automatically added by dh_makeshlibs
- ldconfig
- # Actualize .all files
- createGeneric /usr/include/Bpp
-fi
+case "$1" in
+ remove)
+ # Automatically added by dh_makeshlibs
+ ldconfig
+ # Actualize .all files
+ createGeneric /usr/include/Bpp
+ ;;
+ purge|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear)
+ echo $1
+ ;;
+ *)
+ echo "postrm called with unknown argument \`\$1'" >&2
+ exit 0
+ ;;
+esac
+
+# dh_installdeb will replace this with shell code automatically
+# generated by other debhelper scripts.
+
+#DEBHELPER#
exit 0
diff --git a/debian/prerm b/debian/prerm
index 5aefd24..8fab52e 100755
--- a/debian/prerm
+++ b/debian/prerm
@@ -19,9 +19,23 @@ removeGeneric() {
done
}
-if [ "$1" = "remove" ]; then
- # Actualize .all files
- removeGeneric /usr/include/Bpp
-fi
+case "$1" in
+ remove|upgrade|deconfigure)
+ # Actualize .all files
+ removeGeneric /usr/include/Bpp
+ ;;
+ failed-upgrade)
+ echo "$1"
+ ;;
+ *)
+ echo "prerm called with unknown argument \`$1'" >&2
+ exit 1
+ ;;
+esac
+
+# dh_installdeb will replace this with shell code automatically
+# generated by other debhelper scripts.
+
+#DEBHELPER#
exit 0
diff --git a/debian/rules b/debian/rules
index 92c389e..34313ef 100755
--- a/debian/rules
+++ b/debian/rules
@@ -38,7 +38,9 @@ configure:
config.status: configure
dh_testdir
-build: build-stamp
+build: build-arch build-indep
+build-arch: build-stamp
+build-indep: build-stamp
build-stamp: config.status
dh_testdir
diff --git a/debian/postinst b/genIncludes.sh
similarity index 80%
copy from debian/postinst
copy to genIncludes.sh
index cf9e925..56710e9 100755
--- a/debian/postinst
+++ b/genIncludes.sh
@@ -1,8 +1,5 @@
#! /bin/bash
-# Abort if any command returns an error value
-set -e
-
createGeneric() {
echo "-- Creating generic include file: $1.all"
#Make sure we run into subdirectories first:
@@ -35,9 +32,4 @@ createGeneric() {
done;
}
-if [ "$1" = "configure" ]; then
- # Actualize .all files
- createGeneric /usr/include/Bpp
-fi
-
-exit 0
+createGeneric $1
diff --git a/src/Bpp/Seq/Alphabet/AbstractAlphabet.cpp b/src/Bpp/Seq/Alphabet/AbstractAlphabet.cpp
index 1c970bd..79a1d73 100644
--- a/src/Bpp/Seq/Alphabet/AbstractAlphabet.cpp
+++ b/src/Bpp/Seq/Alphabet/AbstractAlphabet.cpp
@@ -48,6 +48,7 @@ using namespace bpp;
// From the STL:
#include <ctype.h>
#include <map>
+#include <iostream>
using namespace std;
@@ -56,32 +57,36 @@ using namespace std;
void AbstractAlphabet::updateMaps_(size_t pos, const AlphabetState& st) {
if (letters_.find(st.getLetter()) == letters_.end())
letters_[st.getLetter()] = pos;
+ else
+ throw Exception("AbstractAlphabet::updateMaps_. A state with the same character code already exists! " + st.getLetter() + ".");
if (nums_.find(st.getNum()) == nums_.end())
nums_[st.getNum()] = pos;
+ else
+ nums_[st.getNum()] = min(pos, nums_[st.getNum()]);
}
/******************************************************************************/
-void AbstractAlphabet::registerState(const AlphabetState& st) {
+void AbstractAlphabet::registerState(AlphabetState* st) throw (Exception) {
// Add the state to the vector
- alphabet_.push_back(st.clone());
+ alphabet_.push_back(st);
// Update the maps
- updateMaps_(alphabet_.size(), st);
+ updateMaps_(alphabet_.size() - 1, *st);
}
/******************************************************************************/
-void AbstractAlphabet::setState(size_t pos, const AlphabetState& st)
- throw (IndexOutOfBoundsException) {
+void AbstractAlphabet::setState(size_t pos, AlphabetState* st)
+ throw (Exception, IndexOutOfBoundsException) {
if (pos > alphabet_.size())
throw IndexOutOfBoundsException("AbstractAlphabet::setState: incorect position", pos, 0, alphabet_.size());
// Delete the state if not empty
if (alphabet_[pos] != 0)
delete alphabet_[pos];
// Put the state in the vector
- alphabet_[pos] = st.clone();
+ alphabet_[pos] = st;
// Update the maps
- updateMaps_(pos, st);
+ updateMaps_(pos, *st);
}
/******************************************************************************/
@@ -95,10 +100,46 @@ const AlphabetState& AbstractAlphabet::getState(const std::string& letter) const
/******************************************************************************/
+size_t AbstractAlphabet::getStateIndex(const std::string& letter) const throw (BadCharException) {
+ map<string, size_t>::const_iterator it = letters_.find(letter);
+ if (it == letters_.end())
+ throw BadCharException(letter, "AbstractAlphabet::getStateIndex(string): Specified base unknown", this);
+ return it->second;
+}
+
+/******************************************************************************/
+
const AlphabetState& AbstractAlphabet::getState(int num) const throw (BadIntException) {
map<int, size_t>::const_iterator it = nums_.find(num);
if (it == nums_.end())
throw BadIntException(num, "AbstractAlphabet::getState(int): Specified base unknown", this);
+ return *(alphabet_[it->second]);
+}
+
+/******************************************************************************/
+
+size_t AbstractAlphabet::getStateIndex(int num) const throw (BadIntException) {
+ map<int, size_t>::const_iterator it = nums_.find(num);
+ if (it == nums_.end())
+ throw BadIntException(num, "AbstractAlphabet::getStateIndex(int): Specified base unknown", this);
+ return it->second;
+}
+
+/******************************************************************************/
+
+AlphabetState& AbstractAlphabet::getState(const std::string& letter) throw (BadCharException) {
+ map<string, size_t>::iterator it = letters_.find(letter);
+ if (it == letters_.end())
+ throw BadCharException(letter, "AbstractAlphabet::getState(string): Specified base unknown", this);
+ return * (alphabet_[it->second]);
+}
+
+/******************************************************************************/
+
+AlphabetState& AbstractAlphabet::getState(int num) throw (BadIntException) {
+ map<int, size_t>::iterator it = nums_.find(num);
+ if (it == nums_.end())
+ throw BadIntException(num, "AbstractAlphabet::getState(int): Specified base unknown", this);
return * (alphabet_[it->second]);
}
@@ -242,11 +283,11 @@ std::string AbstractAlphabet::getGeneric(const std::vector<std::string>& states)
const std::vector<int>& AbstractAlphabet::getSupportedInts() const
{
- if(intList_.size() == 0)
+ if(intList_.size() != alphabet_.size())
{
intList_.resize(alphabet_.size());
charList_.resize(alphabet_.size());
- for(unsigned int i = 0; i < alphabet_.size(); i++)
+ for (size_t i = 0; i < alphabet_.size(); ++i)
{
intList_[i] = alphabet_[i]->getNum();
charList_[i] = alphabet_[i]->getLetter();
@@ -259,11 +300,11 @@ const std::vector<int>& AbstractAlphabet::getSupportedInts() const
const std::vector<std::string>& AbstractAlphabet::getSupportedChars() const
{
- if(charList_.size() == 0)
+ if(charList_.size() != alphabet_.size())
{
intList_.resize(alphabet_.size());
charList_.resize(alphabet_.size());
- for(unsigned int i = 0; i < alphabet_.size(); i++)
+ for (size_t i = 0; i < alphabet_.size(); ++i)
{
intList_[i] = alphabet_[i]->getNum();
charList_[i] = alphabet_[i]->getLetter();
diff --git a/src/Bpp/Seq/Alphabet/AbstractAlphabet.h b/src/Bpp/Seq/Alphabet/AbstractAlphabet.h
index f32ab45..8a0a8ed 100644
--- a/src/Bpp/Seq/Alphabet/AbstractAlphabet.h
+++ b/src/Bpp/Seq/Alphabet/AbstractAlphabet.h
@@ -104,9 +104,35 @@ namespace bpp
AbstractAlphabet(): alphabet_(), letters_(), nums_(), charList_(), intList_() {}
+ AbstractAlphabet(const AbstractAlphabet& alph) : alphabet_(), letters_(alph.letters_), nums_(alph.nums_), charList_(alph.charList_), intList_(alph.intList_)
+ {
+ for (size_t i = 0; i < alph.alphabet_.size(); ++i)
+ alphabet_.push_back(new AlphabetState(*alph.alphabet_[i]));
+ }
+
+ AbstractAlphabet& operator=(const AbstractAlphabet& alph)
+ {
+ for (size_t i = 0 ; i < alphabet_.size() ; ++i)
+ delete alphabet_[i];
+
+ for (size_t i = 0; i < alph.alphabet_.size(); ++i)
+ alphabet_.push_back(new AlphabetState(*alph.alphabet_[i]));
+
+ letters_ = alph.letters_;
+ nums_ = alph.nums_;
+ charList_ = alph.charList_;
+ intList_ = alph.intList_;
+
+ return *this;
+ }
+
+#ifndef NO_VIRTUAL_COV
+ virtual AbstractAlphabet* clone() const = 0;
+#endif
+
virtual ~AbstractAlphabet()
{
- for (unsigned int i = 0 ; i < alphabet_.size() ; i++)
+ for (size_t i = 0 ; i < alphabet_.size() ; ++i)
delete alphabet_[i];
}
@@ -116,6 +142,7 @@ namespace bpp
*
* @{
*/
+ size_t getNumberOfStates() const { return alphabet_.size(); }
unsigned int getNumberOfChars() const { return static_cast<unsigned int>(alphabet_.size()); }
std::string getName(const std::string& state) const throw (BadCharException);
std::string getName(int state) const throw (BadIntException);
@@ -139,6 +166,28 @@ namespace bpp
* @{
*/
/**
+ * @brief Get a state at a position in the alphabet_ vector.
+ *
+ * This method must be overloaded in specialized classes to send back
+ * a reference of the corect type.
+ *
+ * @param stateIndex The index of the state in the alphabet_ vector.
+ * @throw IndexOutOfBoundsException If the index is invalid.
+ */
+ virtual AlphabetState& getStateAt(size_t stateIndex) throw (IndexOutOfBoundsException);
+
+ /**
+ * @brief Get a state at a position in the alphabet_ vector.
+ *
+ * This method must be overloaded in specialized classes to send back
+ * a reference of the corect type.
+ *
+ * @param stateIndex The index of the state in the alphabet_ vector.
+ * @throw IndexOutOfBoundsException If the index is invalid.
+ */
+ virtual const AlphabetState& getStateAt(size_t stateIndex) const throw (IndexOutOfBoundsException);
+
+ /**
* @brief Get a state by its letter.
*
* This method must be overloaded in specialized classes to send back
@@ -148,6 +197,9 @@ namespace bpp
* @throw BadCharException If the letter is not in the Alphabet.
*/
const AlphabetState& getState(const std::string& letter) const throw (BadCharException);
+
+ AlphabetState& getState(const std::string& letter) throw (BadCharException);
+
/**
* @brief Get a state by its num.
*
@@ -158,6 +210,20 @@ namespace bpp
* @throw BadIntException If the num is not in the Alphabet.
*/
const AlphabetState& getState(int num) const throw (BadIntException);
+
+ AlphabetState& getState(int num) throw (BadIntException);
+
+ int getIntCodeAt(size_t stateIndex) const throw (IndexOutOfBoundsException) {
+ return getStateAt(stateIndex).getNum();
+ }
+
+ const std::string& getCharCodeAt(size_t stateIndex) const throw (IndexOutOfBoundsException) {
+ return getStateAt(stateIndex).getLetter();
+ }
+
+ size_t getStateIndex(int state) const throw (BadIntException);
+
+ size_t getStateIndex(const std::string& state) const throw (BadCharException);
/** @} */
protected:
@@ -165,53 +231,43 @@ namespace bpp
* @brief Add a state to the Alphabet.
*
* @param st The state to add.
+ * @throw Exception If a wrong alphabet state is provided.
*/
- virtual void registerState(const AlphabetState& st);
+ virtual void registerState(AlphabetState* st) throw (Exception);
+
/**
* @brief Set a state in the Alphabet.
*
* @param pos The index of the state in the alphabet_ vector.
* @param st The new state to put in the Alphabet.
+ * @throw Exception If a wrong alphabet state is provided.
+ * @throw IndexOutOfBoundsException If an incorrect index is provided.
*/
- virtual void setState(size_t pos, const AlphabetState& st) throw (IndexOutOfBoundsException);
+ virtual void setState(size_t pos, AlphabetState* st) throw (Exception, IndexOutOfBoundsException);
+
/**
* @brief Resize the private alphabet_ vector.
*
* @param size The new size of the Alphabet.
*/
- void resize(unsigned int size) { alphabet_.resize(size); }
- /**
- * @brief Get a state at a position in the alphabet_ vector.
- *
- * This method must be overloaded in specialized classes to send back
- * a reference of the corect type.
- *
- * @param pos The index of the state in the alphabet_ vector.
- * @throw IndexOutOfBoundsException If pos is out of the vector.
- */
- virtual AlphabetState& getStateAt(size_t pos) throw (IndexOutOfBoundsException);
- /**
- * @brief Get a state at a position in the alphabet_ vector.
- *
- * This method must be overloaded in specialized classes to send back
- * a reference of the corect type.
- *
- * @param pos The index of the state in the alphabet_ vector.
- * @throw IndexOutOfBoundsException If pos is out of the vector.
- */
- virtual const AlphabetState& getStateAt(size_t pos) const throw (IndexOutOfBoundsException);
-
+ void resize(size_t size) { alphabet_.resize(size); }
+
/**
* @brief Re-update the maps using the alphabet_ vector content.
*/
void remap() {
- for (size_t i = 0 ; i < alphabet_.size() ; i++) {
- updateMaps_(i, * alphabet_[i]);
+ letters_.clear();
+ nums_.clear();
+ for (size_t i = 0; i < alphabet_.size(); ++i) {
+ updateMaps_(i, *alphabet_[i]);
}
}
unsigned int getStateCodingSize() const { return 1; }
+ bool equals(const Alphabet& alphabet) const {
+ return getAlphabetType() == alphabet.getAlphabetType();
+ }
};
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Alphabet/Alphabet.h b/src/Bpp/Seq/Alphabet/Alphabet.h
index d61ae1d..ca4b927 100644
--- a/src/Bpp/Seq/Alphabet/Alphabet.h
+++ b/src/Bpp/Seq/Alphabet/Alphabet.h
@@ -47,6 +47,8 @@ knowledge of the CeCILL license and that you accept its terms.
#include "AlphabetExceptions.h"
#include "AlphabetState.h"
+#include <Bpp/Clonable.h>
+
/**
* @mainpage
*
@@ -112,18 +114,37 @@ namespace bpp
* the Alphabet classes, and the two methods intToChar() and charToInt().
* The Alphabet interface also provides other methods, like getting the full name
* of the states and so on.
+ *
+ * An Alphabet object in itself stores the states as AlphabetStates object, in a
+ * potentially arbitrary but consistent series. All states are then indexed from
+ * 0 to 'numbersOfChars'. The number of states is equal to the number of string
+ * representations, but is usually higher than the number of int representation,
+ * as several characters can correspond to the same state (for instance X, N and ?
+ * in nucleotide alphabets).
*
* The alphabet objects may throw several exceptions derived of the AlphabetException
* class.
*
* @see AlphabetException, BadCharException, BadIntException
*/
-class Alphabet
+ class Alphabet:
+ public virtual Clonable
{
public:
Alphabet() {}
virtual ~Alphabet() {}
+
+ /**
+ * @name The Clonable interface
+ *
+ * @{
+ */
+#ifndef NO_VIRTUAL_COV
+ Alphabet* clone() const = 0;
+#endif
+ /** @} */
+
public:
/**
@@ -151,7 +172,29 @@ class Alphabet
virtual std::string getName(const std::string& state) const throw (BadCharException) = 0;
/**
- * @name = Tests
+ * @return The int code of a given state.
+ * @param stateIndex The index of the state to fetch.
+ */
+ virtual int getIntCodeAt(size_t stateIndex) const throw (IndexOutOfBoundsException) = 0;
+
+ /**
+ * @return The char code of a given state.
+ * @param stateIndex The index of the state to fetch.
+ */
+ virtual const std::string& getCharCodeAt(size_t stateIndex) const throw (IndexOutOfBoundsException) = 0;
+
+ /**
+ * @return The indices of the states with corresponding int code.
+ */
+ virtual size_t getStateIndex(int state) const throw (BadIntException) = 0;
+
+ /**
+ * @return The index of the state with corresponding char code.
+ */
+ virtual size_t getStateIndex(const std::string& state) const throw (BadCharException) = 0;
+
+ /**
+ * @name Tests
*
* @{
*/
@@ -182,8 +225,19 @@ class Alphabet
*/
/**
+ * @brief Get a state given its index.
+ *
+ * @param stateIndex The index of the state.
+ * @return The AlphabetState.
+ * @throw IndexOutOfBoundsException When index is not a valid.
+ */
+ virtual const AlphabetState& getStateAt(size_t stateIndex) const throw (IndexOutOfBoundsException) = 0;
+
+ /**
* @brief Get a state given its int description.
*
+ * Note: several states can share the same int values. This function will return one.
+ *
* @param state The int description.
* @return The AlphabetState.
* @throw BadIntException When state is not a valid integer.
@@ -231,7 +285,15 @@ class Alphabet
*
* @{
*/
-
+
+ /**
+ * @brief This is a convenient alias for getNumberOfChars(), returning a size_t
+ * instead of unsigned int.
+ *
+ * This funcion is typically used il loops over all states of an alphabet.
+ */
+ virtual size_t getNumberOfStates() const = 0;
+
/**
* @brief Get the number of supported characters in this alphabet,
* including generic characters (e.g. return 20 for DNA alphabet).
@@ -382,6 +444,13 @@ class Alphabet
* @author Sylvain Gaillard
*/
virtual unsigned int getStateCodingSize() const = 0;
+
+ /**
+ * @brief Comparison of alphabets
+ *
+ * @return true If the two instances are of the same class.
+ */
+ virtual bool equals(const Alphabet& alphabet) const = 0;
};
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Io/BppOSequenceWriterFormat.cpp b/src/Bpp/Seq/Alphabet/AlphabetNumericState.h
similarity index 54%
copy from src/Bpp/Seq/Io/BppOSequenceWriterFormat.cpp
copy to src/Bpp/Seq/Alphabet/AlphabetNumericState.h
index eb1c39e..83b40ef 100644
--- a/src/Bpp/Seq/Io/BppOSequenceWriterFormat.cpp
+++ b/src/Bpp/Seq/Alphabet/AlphabetNumericState.h
@@ -1,14 +1,14 @@
-//
-// File: BppOSequenceWriterFormat.cpp
-// Created by: Julien Dutheil
-// Created on: Friday September 15th, 21:20
-//
+//
+// File: AlphabetState.h
+// Author: Laurent Guéguen
+// Created: 03/2010
+//
/*
- Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (March, 2010)
This software is a computer program whose purpose is to provide classes
- for phylogenetic data analysis.
+ for sequences analysis.
This software is governed by the CeCILL license under French law and
abiding by the rules of distribution of free software. You can use,
@@ -37,38 +37,62 @@
knowledge of the CeCILL license and that you accept its terms.
*/
-#include "BppOSequenceWriterFormat.h"
-#include "Fasta.h"
-#include "Mase.h"
-
-#include <Bpp/Text/KeyvalTools.h>
+#ifndef _ALPHABETNUMERICSTATE_H_
+#define _ALPHABETNUMERICSTATE_H_
+// From the STL
#include <string>
-#include <memory>
-
-using namespace bpp;
-using namespace std;
-
-OSequence* BppOSequenceWriterFormat::read(const std::string& description) throw (Exception)
-{
- unparsedArguments_.clear();
- string format = "";
- KeyvalTools::parseProcedure(description, format, unparsedArguments_);
- unsigned int ncol = ApplicationTools::getParameter<unsigned int>("length", unparsedArguments_, 100, "", true, false);
- auto_ptr<OSequence> oSeq;
- if (format == "Fasta")
- {
- oSeq.reset(new Fasta(ncol));
- }
- else if (format == "Mase")
- {
- oSeq.reset(new Mase(ncol));
- }
- else
- {
- throw Exception("Sequence format '" + format + "' unknown.");
- }
-
- return oSeq.release();
+
+// From bpp-core
+#include <Bpp/Clonable.h>
+
+// From bpp-seq
+
+#include "AlphabetState.h"
+
+namespace bpp {
+ /**
+ * @brief States that do have a double value
+ *
+ * @author Laurent Guéguen
+ */
+ class AlphabetNumericState: public AlphabetState {
+ private:
+ double value_;
+
+ public:
+ AlphabetNumericState(int num, double value, const std::string& letter, const std::string& name): AlphabetState(num, letter, name), value_(value) {}
+
+ // Class destructor
+ virtual ~AlphabetNumericState() {}
+
+ public:
+ /**
+ * @name The Clonable interface.
+ * @{
+ */
+#ifdef NO_VIRTUAL_COV
+ Clonable*
+#else
+ AlphabetNumericState*
+#endif
+ clone() const { return new AlphabetNumericState(* this); }
+ /** @} */
+ /**
+ * @brief Get the state value.
+ *
+ * @return The state value
+ */
+ double getValue() const { return value_; }
+
+ /**
+ * @brief Set the state value.
+ * @param value The given state value.
+ */
+ void setValue(double value) { value_ = value; }
+
+ };
}
+#endif // _ALPHABETNUMERICSTATE_H_
+
diff --git a/src/Bpp/Seq/Alphabet/AlphabetTools.cpp b/src/Bpp/Seq/Alphabet/AlphabetTools.cpp
index 90f42f5..645e0e3 100644
--- a/src/Bpp/Seq/Alphabet/AlphabetTools.cpp
+++ b/src/Bpp/Seq/Alphabet/AlphabetTools.cpp
@@ -91,7 +91,7 @@ bool AlphabetTools::checkAlphabetCodingSize(const Alphabet& alphabet) throw (Alp
if (alphabet.getNumberOfChars() == 0)
return true; // Will this really happen?
size_t size = alphabet.intToChar(0).size();
- for (unsigned int i = 1; i < alphabet.getNumberOfTypes(); i++)
+ for (int i = 1; i < static_cast<int>(alphabet.getNumberOfTypes()); ++i)
{
if (alphabet.intToChar(i).size() != size)
return false;
diff --git a/src/Bpp/Seq/Alphabet/BinaryAlphabet.cpp b/src/Bpp/Seq/Alphabet/BinaryAlphabet.cpp
index 0a51ddd..522b812 100644
--- a/src/Bpp/Seq/Alphabet/BinaryAlphabet.cpp
+++ b/src/Bpp/Seq/Alphabet/BinaryAlphabet.cpp
@@ -5,7 +5,7 @@
//
/*
- Copyright or � or Copr. CNRS, (November 17, 2004)
+ Copyright or � or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -48,15 +48,12 @@ using namespace bpp;
BinaryAlphabet::BinaryAlphabet()
{
- // Alphabet size definition
- resize(3);
-
// Alphabet content definition
- setState(0, AlphabetState(-1, "-", "Gap"));
+ registerState(new AlphabetState(-1, "-", "Gap"));
- for (unsigned int i = 0; i < 2; i++)
+ for (int i = 0; i < 2; i++)
{
- setState(i + 1, AlphabetState(i, TextTools::toString(i), ""));
+ registerState(new AlphabetState(i, TextTools::toString(i), ""));
}
}
diff --git a/src/Bpp/Seq/Alphabet/BinaryAlphabet.h b/src/Bpp/Seq/Alphabet/BinaryAlphabet.h
index dbb05a8..563af67 100644
--- a/src/Bpp/Seq/Alphabet/BinaryAlphabet.h
+++ b/src/Bpp/Seq/Alphabet/BinaryAlphabet.h
@@ -4,7 +4,7 @@
//
/*
- Copyright or � or Copr. CNRS, (November 17, 2004)
+ Copyright or � or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -51,16 +51,24 @@ namespace bpp
class BinaryAlphabet :
public AbstractAlphabet
{
-protected:
- void registerState(const AlphabetState& st)
- {
- AbstractAlphabet::registerState(*(st.clone()));
- }
-
public:
// class constructor
BinaryAlphabet();
+ BinaryAlphabet(const BinaryAlphabet& bia) : AbstractAlphabet(bia) {}
+
+ BinaryAlphabet& operator=(const BinaryAlphabet& bia)
+ {
+ AbstractAlphabet::operator=(bia);
+
+ return *this;
+ }
+
+ BinaryAlphabet* clone() const
+ {
+ return new BinaryAlphabet(*this);
+ }
+
// class destructor
virtual ~BinaryAlphabet() {}
diff --git a/src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.cpp b/src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.cpp
index 024336f..7acc094 100644
--- a/src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.cpp
+++ b/src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.cpp
@@ -53,13 +53,13 @@ CaseMaskedAlphabet::CaseMaskedAlphabet(const LetterAlphabet* nocaseAlphabet) :
nocaseAlphabet_(nocaseAlphabet)
{
vector<string> chars = nocaseAlphabet_->getSupportedChars();
- for (unsigned int i = 0; i < chars.size(); ++i) {
- AlphabetState state = nocaseAlphabet_->getState(chars[i]);
+ for (size_t i = 0; i < chars.size(); ++i) {
+ AlphabetState* state = nocaseAlphabet_->getState(chars[i]).clone();
registerState(state);
char c = *chars[i].c_str();
if (isalpha(c)) {
if (isupper(c)) {
- registerState(AlphabetState(state.getNum() + 100, TextTools::toLower(state.getLetter()), string("Masked ") + state.getName()));
+ registerState(new AlphabetState(state->getNum() + 100, TextTools::toLower(state->getLetter()), string("Masked ") + state->getName()));
}
}
}
diff --git a/src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.h b/src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.h
index 225d6ce..5c90a29 100644
--- a/src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.h
+++ b/src/Bpp/Seq/Alphabet/CaseMaskedAlphabet.h
@@ -5,36 +5,36 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
+ Copyright or © or Copr. CNRS, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
*/
#ifndef _CASEMASKEDALPHABET_H_
@@ -56,25 +56,32 @@ namespace bpp
* by creating a lower case version of the state, also named "masked" state.
* Helper functions are provided to determine whether a given state is masked or unmasked.
*/
-class CaseMaskedAlphabet :
- public LetterAlphabet
-{
+ class CaseMaskedAlphabet :
+ public LetterAlphabet
+ {
public:
const LetterAlphabet* nocaseAlphabet_;
public:
CaseMaskedAlphabet(const LetterAlphabet* nocaseAlphabet);
+
CaseMaskedAlphabet(const CaseMaskedAlphabet& cma) :
LetterAlphabet(cma), nocaseAlphabet_(cma.nocaseAlphabet_) {}
+
CaseMaskedAlphabet& operator=(const CaseMaskedAlphabet& cma) {
LetterAlphabet::operator=(cma);
nocaseAlphabet_ = cma.nocaseAlphabet_;
return *this;
}
+ CaseMaskedAlphabet* clone() const
+ {
+ return new CaseMaskedAlphabet(*this);
+ }
+
public:
- unsigned int getSize() const { return nocaseAlphabet_->getSize(); }
- unsigned int getNumberOfTypes() const { return nocaseAlphabet_->getNumberOfTypes(); }
+ unsigned int getSize() const { return nocaseAlphabet_->getSize(); }
+ unsigned int getNumberOfTypes() const { return nocaseAlphabet_->getNumberOfTypes(); }
std::string getAlphabetType() const { return "Default alphabet"; }
int getUnknownCharacterCode() const { return nocaseAlphabet_->getUnknownCharacterCode(); }
bool isUnresolved(int state) const { return nocaseAlphabet_->isUnresolved(state); }
@@ -109,7 +116,7 @@ class CaseMaskedAlphabet :
*/
const std::string getMaskedEquivalentState(const std::string& state) const throw (BadCharException, BadIntException);
-};
+ };
} // end of namespace bpp
diff --git a/src/Bpp/Seq/Alphabet/CodonAlphabet.cpp b/src/Bpp/Seq/Alphabet/CodonAlphabet.cpp
index 15522c2..9072c02 100644
--- a/src/Bpp/Seq/Alphabet/CodonAlphabet.cpp
+++ b/src/Bpp/Seq/Alphabet/CodonAlphabet.cpp
@@ -1,11 +1,11 @@
//
-// File: CodonAlphabet.h
+// File: CodonAlphabet.cpp
// Created by: Julien Dutheil
// Created on: Sun Oct 12 17:41:56 2003
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -47,15 +47,8 @@ using namespace bpp;
using namespace std;
-const string CodonAlphabet::STOP = "Stop";
-
/******************************************************************************/
-unsigned int CodonAlphabet::numberOfStopCodons() const
-{
- return static_cast<unsigned int>(stopCodons_.size());
-}
-
int CodonAlphabet::getCodon(int pos1, int pos2, int pos3) const
throw (BadIntException)
{
@@ -125,3 +118,4 @@ throw (BadCharException)
return getNPosition(codon,2);
}
+/******************************************************************************/
diff --git a/src/Bpp/Seq/Alphabet/CodonAlphabet.h b/src/Bpp/Seq/Alphabet/CodonAlphabet.h
index a0f37b8..b08f0e3 100644
--- a/src/Bpp/Seq/Alphabet/CodonAlphabet.h
+++ b/src/Bpp/Seq/Alphabet/CodonAlphabet.h
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -51,9 +51,10 @@ namespace bpp
{
/**
- * @brief The abstract base class for codon alphabets.
- * @author Laurent Guéguen
+ * @brief Codon alphabet class.
+ * @author Laurent Guéguen, Julien Dutheil
*
+ * A codon alphabet object is a particular case of WordAlphabet with three letters.
* Since codons are made of 3 nucleic bases (RNA or DNA), this class
* has a NucleicAlphabet field used to check char description. This
* nucleic alphabet is passed to the constructor. This class also adds
@@ -62,20 +63,6 @@ namespace bpp
class CodonAlphabet:
public WordAlphabet
{
-protected:
- /**
- *@brief the vector of the numbers of the stop codon states.
- */
- std::vector<int> stopCodons_;
-
- //Constant used for init codon:
- int initCodon_;
-
-
-public:
- //Constant used for stop codons:
- static const std::string STOP;
-
public: // Constructor and destructor.
/**
@@ -83,11 +70,29 @@ public: // Constructor and destructor.
*
* @param alpha The nucleic alphabet to be used.
*/
- CodonAlphabet(const NucleicAlphabet* alpha) : WordAlphabet(alpha, 3), stopCodons_(), initCodon_() {}
+ CodonAlphabet(const NucleicAlphabet* alpha) :
+ WordAlphabet(alpha, 3) {}
+
+ CodonAlphabet(const CodonAlphabet& bia) : WordAlphabet(bia) {}
+
+ CodonAlphabet& operator=(const CodonAlphabet& bia)
+ {
+ WordAlphabet::operator=(bia);
+ return *this;
+ }
+ CodonAlphabet* clone() const
+ {
+ return new CodonAlphabet(*this);
+ }
+
virtual ~CodonAlphabet() {}
- virtual std::string getAlphabetType() const = 0;
+ std::string getAlphabetType() const
+ {
+ return "Codon alphabet("+ vAbsAlph_[0]->getAlphabetType() + ")";
+ }
+
public:
@@ -98,18 +103,6 @@ public:
*/
/**
- * @brief Returns the number of stop codons
- */
-
- unsigned int numberOfStopCodons() const;
-
- /**
- * @brief Returns the vector of the numbers of the stop codon states.
- */
-
- const std::vector<int>& stopCodons() const { return stopCodons_;}
-
- /**
* @brief Get the int code for a codon given the int code of the three underlying positions.
*
* The int code of each position must match the nucleic alphabet specified for this alphabet.
@@ -181,50 +174,6 @@ public:
virtual std::string getThirdPosition(const std::string& codon) const throw (BadCharException);
/**
- * @brief Tell whether a particular codon is a stop codon.
- *
- * @param codon The int description of the codon to test.
- * @return True if the codon is a stop codon.
- */
- bool isStop(int codon) const
- {
- return (getName(intToChar(codon)) == STOP);
- }
-
- /**
- * @brief Tell whether a particular codon is a stop codon.
- *
- * @param codon The char description of the codon to test.
- * @return True if the codon is a stop codon.
- */
- bool isStop(const std::string& codon) const
- {
- return (getName(codon) == STOP);
- }
-
- /**
- * @brief Tell whether a particular codon is the init codon.
- *
- * @param codon The int description of the codon to test.
- * @return True if the codon is the init codon.
- */
- bool isInit(int codon) const
- {
- return (codon == initCodon_);
- }
-
- /**
- * @brief Tell whether a particular codon is the init codon.
- *
- * @param codon The char description of the codon to test.
- * @return True if the codon is a init codon.
- */
- bool isInit(const std::string& codon) const
- {
- return (charToInt(codon) == initCodon_);
- }
-
- /**
* @return The nucleic alphabet associated to this codon alphabet.
*/
virtual const NucleicAlphabet* const getNucleicAlphabet() const
diff --git a/src/Bpp/Seq/Alphabet/DNA.cpp b/src/Bpp/Seq/Alphabet/DNA.cpp
index 3eac3e9..e6cdf0d 100644
--- a/src/Bpp/Seq/Alphabet/DNA.cpp
+++ b/src/Bpp/Seq/Alphabet/DNA.cpp
@@ -56,35 +56,32 @@ using namespace std;
DNA::DNA(bool exclamationMarkCountsAsGap)
{
- // Alphabet size definition
- resize(21);
-
// Alphabet content definition
// all unresolved bases use n°14
- setState( 0, NucleicAlphabetState(-1, "-", 0, "Gap"));
- setState( 1, NucleicAlphabetState( 0, "A", 1, "Adenine"));
- setState( 2, NucleicAlphabetState( 1, "C", 2, "Cytosine"));
- setState( 3, NucleicAlphabetState( 2, "G", 4, "Guanine"));
- setState( 4, NucleicAlphabetState( 3, "T", 8, "Thymine"));
- setState( 5, NucleicAlphabetState( 4, "M", 3, "Adenine or Cytosine"));
- setState( 6, NucleicAlphabetState( 5, "R", 5, "Purine (Adenine or Guanine)"));
- setState( 7, NucleicAlphabetState( 6, "W", 9, "Adenine or Thymine"));
- setState( 8, NucleicAlphabetState( 7, "S", 6, "Cytosine or Guanine"));
- setState( 9, NucleicAlphabetState( 8, "Y", 10, "Pyrimidine (Cytosine or Thymine)"));
- setState(10, NucleicAlphabetState( 9, "K", 12, "Guanine or Thymine"));
- setState(11, NucleicAlphabetState(10, "V", 7, "Adenine or Cytosine or Guanine"));
- setState(12, NucleicAlphabetState(11, "H", 11, "Adenine or Cytosine or Thymine"));
- setState(13, NucleicAlphabetState(12, "D", 13, "Adenine or Guanine or Thymine"));
- setState(14, NucleicAlphabetState(13, "B", 14, "Cytosine or Guanine or Thymine"));
- setState(15, NucleicAlphabetState(14, "N", 15, "Unresolved base"));
- setState(16, NucleicAlphabetState(14, "X", 15, "Unresolved base"));
- setState(17, NucleicAlphabetState(14, "O", 15, "Unresolved base"));
- setState(18, NucleicAlphabetState(14, "0", 15, "Unresolved base"));
- setState(19, NucleicAlphabetState(14, "?", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(-1, "-", 0, "Gap"));
+ registerState(new NucleicAlphabetState( 0, "A", 1, "Adenine"));
+ registerState(new NucleicAlphabetState( 1, "C", 2, "Cytosine"));
+ registerState(new NucleicAlphabetState( 2, "G", 4, "Guanine"));
+ registerState(new NucleicAlphabetState( 3, "T", 8, "Thymine"));
+ registerState(new NucleicAlphabetState( 4, "M", 3, "Adenine or Cytosine"));
+ registerState(new NucleicAlphabetState( 5, "R", 5, "Purine (Adenine or Guanine)"));
+ registerState(new NucleicAlphabetState( 6, "W", 9, "Adenine or Thymine"));
+ registerState(new NucleicAlphabetState( 7, "S", 6, "Cytosine or Guanine"));
+ registerState(new NucleicAlphabetState( 8, "Y", 10, "Pyrimidine (Cytosine or Thymine)"));
+ registerState(new NucleicAlphabetState( 9, "K", 12, "Guanine or Thymine"));
+ registerState(new NucleicAlphabetState(10, "V", 7, "Adenine or Cytosine or Guanine"));
+ registerState(new NucleicAlphabetState(11, "H", 11, "Adenine or Cytosine or Thymine"));
+ registerState(new NucleicAlphabetState(12, "D", 13, "Adenine or Guanine or Thymine"));
+ registerState(new NucleicAlphabetState(13, "B", 14, "Cytosine or Guanine or Thymine"));
+ registerState(new NucleicAlphabetState(14, "N", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(14, "X", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(14, "O", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(14, "0", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(14, "?", 15, "Unresolved base"));
if (exclamationMarkCountsAsGap)
- setState(20, NucleicAlphabetState(-1, "!", 0, "Unresolved base"));
+ registerState(new NucleicAlphabetState(-1, "!", 0, "Frameshift"));
else
- setState(20, NucleicAlphabetState(14, "!", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(14, "!", 15, "Unresolved base"));
}
/******************************************************************************/
diff --git a/src/Bpp/Seq/Alphabet/DNA.h b/src/Bpp/Seq/Alphabet/DNA.h
index b11c9d9..668cfa4 100644
--- a/src/Bpp/Seq/Alphabet/DNA.h
+++ b/src/Bpp/Seq/Alphabet/DNA.h
@@ -6,36 +6,36 @@
//
/*
-Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
*/
#ifndef _DNA_H_
@@ -57,24 +57,38 @@ namespace bpp
* Gaps are coded by '-', unresolved characters are coded by 'X, N, O, 0 or ?'.
* Extensive support for generic characters (e.g. 'P', 'Y', etc.) is provided.
*/
-class DNA:
- public NucleicAlphabet
-{
- public:
+ class DNA:
+ public NucleicAlphabet
+ {
+ public:
/**
* @param exclamationMarkCountsAsGap If yes, '!' characters are replaced by gaps.
* Otherwise, they are counted as unknown characters.
*/
- DNA(bool exclamationMarkCountsAsGap = false);
- virtual ~DNA() {}
+ DNA(bool exclamationMarkCountsAsGap = false);
+
+ DNA(const DNA& bia) : NucleicAlphabet(bia) {}
+
+ DNA& operator=(const DNA& bia)
+ {
+ NucleicAlphabet::operator=(bia);
+ return *this;
+ }
+
+ DNA* clone() const
+ {
+ return new DNA(*this);
+ }
+
+ virtual ~DNA() {}
- public:
+ public:
std::vector<int> getAlias(int state) const throw (BadIntException);
std::vector<std::string> getAlias(const std::string& state) const throw (BadCharException);
int getGeneric(const std::vector<int>& states) const throw (BadIntException);
std::string getGeneric(const std::vector<std::string>& states) const throw (BadCharException);
std::string getAlphabetType() const { return "DNA alphabet"; }
-};
+ };
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Alphabet/DefaultAlphabet.cpp b/src/Bpp/Seq/Alphabet/DefaultAlphabet.cpp
index 3b48611..d3d12a0 100644
--- a/src/Bpp/Seq/Alphabet/DefaultAlphabet.cpp
+++ b/src/Bpp/Seq/Alphabet/DefaultAlphabet.cpp
@@ -6,7 +6,7 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -48,15 +48,12 @@ using namespace bpp;
DefaultAlphabet::DefaultAlphabet():
chars_("ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890.?")
{
- // Alphabet size definition
- resize(static_cast<unsigned int>(chars_.size()) + 1);
-
// Alphabet content definition
- setState(0, AlphabetState(-1, "-", "Gap"));
+ registerState(new AlphabetState(-1, "-", "Gap"));
- for (size_t i = 0; i < chars_.size(); i++)
+ for (size_t i = 0; i < chars_.size(); i++)
{
- setState(i + 1, AlphabetState(static_cast<int>(i), TextTools::toString(chars_[i]), ""));
- }
+ registerState(new AlphabetState(static_cast<int>(i), TextTools::toString(chars_[i]), ""));
+ }
}
diff --git a/src/Bpp/Seq/Alphabet/DefaultAlphabet.h b/src/Bpp/Seq/Alphabet/DefaultAlphabet.h
index f88edf9..b84419c 100644
--- a/src/Bpp/Seq/Alphabet/DefaultAlphabet.h
+++ b/src/Bpp/Seq/Alphabet/DefaultAlphabet.h
@@ -4,36 +4,36 @@
//
/*
-Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
*/
#ifndef _DEFAULTALPHABET_H_
@@ -51,31 +51,47 @@ namespace bpp
* This alphabet should match virtually any type of sequences.
* This should be used by who does not care of the sequence type.
*/
-class DefaultAlphabet:
- public LetterAlphabet
-{
- protected:
- void registerState(const AlphabetState& st) {
- AbstractAlphabet::registerState(* (st.clone()));
+ class DefaultAlphabet:
+ public LetterAlphabet
+ {
+ protected:
+ void registerState(AlphabetState* st) throw (Exception) {
+ AbstractAlphabet::registerState(st);
}
- const std::string chars_;
+ std::string chars_;
- public:
- // class constructor
- DefaultAlphabet();
+ public:
+ // class constructor
+ DefaultAlphabet();
+
+ DefaultAlphabet(const DefaultAlphabet& bia) : LetterAlphabet(bia), chars_(bia.chars_) {}
+
+ DefaultAlphabet& operator=(const DefaultAlphabet& bia)
+ {
+ LetterAlphabet::operator=(bia);
+ chars_=bia.chars_;
+ return *this;
+ }
+
+ DefaultAlphabet* clone() const
+ {
+ return new DefaultAlphabet(*this);
+ }
+
+
- // class destructor
- virtual ~DefaultAlphabet() {}
+ // class destructor
+ virtual ~DefaultAlphabet() {}
- public:
- unsigned int getSize() const { return 26; }
- unsigned int getNumberOfTypes() const { return 27; }
+ public:
+ unsigned int getSize() const { return 26; }
+ unsigned int getNumberOfTypes() const { return 27; }
std::string getAlphabetType() const { return "Default alphabet"; }
int getUnknownCharacterCode() const { return 37; }
bool isUnresolved(int state) const { return state == 37; }
bool isUnresolved(const std::string& state) const { return false; }
- };
+ };
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.cpp b/src/Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.cpp
deleted file mode 100644
index bd5953b..0000000
--- a/src/Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//
-// File: EchinodermMitochondrialCodonAlphabet.h
-// Authors: Eric Bazin
-// Sylvain Gaillard
-// Created on: 14 11:31:27 CET 2005
-//
-
-/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
-*/
-
-#include "EchinodermMitochondrialCodonAlphabet.h"
-
-using namespace bpp;
-using namespace std;
-
-EchinodermMitochondrialCodonAlphabet::EchinodermMitochondrialCodonAlphabet(const NucleicAlphabet* alpha) :
- CodonAlphabet(alpha)
-{
- string A = alpha->intToChar(0);
- string G = alpha->intToChar(2);
- string T = alpha->intToChar(3);
-
- vector<string> vstop;
-
- vstop.push_back(T + A + A);
- vstop.push_back(T + A + G);
-
- int istop;
- unsigned int j;
- for (unsigned int i = 0 ; i < vstop.size() ; i++) {
- istop = charToInt(vstop[i]);
- stopCodons_.push_back(istop);
-
- j = 0;
- while (j < getNumberOfChars()) {
- if (getStateAt(j).getNum() == istop) {
- getStateAt(j).setName(STOP);
- break;
- }
- j++;
- }
- }
- initCodon_ = charToInt(A + T + G);
-}
-
diff --git a/src/Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.h b/src/Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.h
deleted file mode 100644
index 7bfc395..0000000
--- a/src/Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.h
+++ /dev/null
@@ -1,72 +0,0 @@
-//
-// File: EchinodermMitochondrialCodonAlphabet.h
-// Created by: Eric Bazin
-// Created on: 14 11:31:27 CET 2005
-//
-
-/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
-*/
-
-#ifndef _ECHINODERMMITOCHONDRIALCODONALPHABET_H_
-#define _ECHINODERMMITOCHONDRIALCODONALPHABET_H_
-
-
-#include "CodonAlphabet.h"
-
-namespace bpp
-{
-
-/**
- * @brief This class implements the Echinoderm and Faltworms
- * Mitochondrial codon alphabet as describe on the NCBI website:
- * http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG9
- * @author Laurent Guéguen
- */
-
-class EchinodermMitochondrialCodonAlphabet : public CodonAlphabet
-{
-public:
- EchinodermMitochondrialCodonAlphabet(const NucleicAlphabet * alpha);
- virtual ~EchinodermMitochondrialCodonAlphabet() {};
-
-public:
- std::string getAlphabetType() const
- {
- return "Codon alphabet: EchinodermMitochondrialCodonAlphabet(" + vAbsAlph_[0]->getAlphabetType() + ")";
- }
-
-};
-
-} //end of namespace bpp.
-
-#endif //_ECHINODERMMITOCHONDRIALCODONALPHABET_H_
diff --git a/src/Bpp/Seq/Alphabet/BinaryAlphabet.cpp b/src/Bpp/Seq/Alphabet/IntegerAlphabet.cpp
similarity index 81%
copy from src/Bpp/Seq/Alphabet/BinaryAlphabet.cpp
copy to src/Bpp/Seq/Alphabet/IntegerAlphabet.cpp
index 0a51ddd..ba07f72 100644
--- a/src/Bpp/Seq/Alphabet/BinaryAlphabet.cpp
+++ b/src/Bpp/Seq/Alphabet/IntegerAlphabet.cpp
@@ -1,11 +1,11 @@
//
// File: BinaryAlphabet.cpp
// Authors: Laurent Gueguen
-// Created on: 2009
+// Created on: vendredi 20 septembre 2013, � 23h 10
//
/*
- Copyright or � or Copr. CNRS, (November 17, 2004)
+ Copyright or � or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -38,7 +38,7 @@
*/
-#include "BinaryAlphabet.h"
+#include "IntegerAlphabet.h"
#include "AlphabetState.h"
// From Utils:
@@ -46,17 +46,17 @@
using namespace bpp;
-BinaryAlphabet::BinaryAlphabet()
+IntegerAlphabet::IntegerAlphabet(unsigned int max) : MAX_(max)
{
// Alphabet size definition
- resize(3);
+ resize(MAX_);
// Alphabet content definition
- setState(0, AlphabetState(-1, "-", "Gap"));
+ registerState(new AlphabetState(-1, "-", "Gap"));
- for (unsigned int i = 0; i < 2; i++)
+ for (int i = 0; i < static_cast<int>(MAX_); i++)
{
- setState(i + 1, AlphabetState(i, TextTools::toString(i), ""));
+ registerState(new AlphabetState(i, TextTools::toString(i), ""));
}
}
diff --git a/src/Bpp/Seq/Alphabet/BinaryAlphabet.h b/src/Bpp/Seq/Alphabet/IntegerAlphabet.h
similarity index 64%
copy from src/Bpp/Seq/Alphabet/BinaryAlphabet.h
copy to src/Bpp/Seq/Alphabet/IntegerAlphabet.h
index dbb05a8..a586b08 100644
--- a/src/Bpp/Seq/Alphabet/BinaryAlphabet.h
+++ b/src/Bpp/Seq/Alphabet/IntegerAlphabet.h
@@ -1,10 +1,11 @@
//
// File: BinaryAlphabet.h
// Author: L Gueguen
+// Created on: vendredi 20 septembre 2013, � 23h 01
//
/*
- Copyright or � or Copr. CNRS, (November 17, 2004)
+ Copyright or � or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -36,43 +37,58 @@
knowledge of the CeCILL license and that you accept its terms.
*/
-#ifndef _BINARYALPHABET_H_
-#define _BINARYALPHABET_H_
+#ifndef _INTEGERALPHABET_H_
+#define _INTEGERALPHABET_H_
#include "AbstractAlphabet.h"
namespace bpp
{
/**
- * @brief The BinaryAlphabet class, letters are 0 and 1.
+ * @brief The Integer Alphabet class, letters are from 0 to a given number, MAX.
* @author Laurent Gueguen
*
*/
-class BinaryAlphabet :
+class IntegerAlphabet :
public AbstractAlphabet
{
-protected:
- void registerState(const AlphabetState& st)
- {
- AbstractAlphabet::registerState(*(st.clone()));
- }
+private:
+ unsigned int MAX_;
public:
// class constructor
- BinaryAlphabet();
+ IntegerAlphabet(unsigned int max);
+
+ IntegerAlphabet(const IntegerAlphabet& bia) : AbstractAlphabet(bia), MAX_(bia.MAX_) {}
+
+ IntegerAlphabet& operator=(const IntegerAlphabet& bia)
+ {
+ AbstractAlphabet::operator=(bia);
+ MAX_=bia.MAX_;
+
+ return *this;
+ }
+ IntegerAlphabet* clone() const
+ {
+ return new IntegerAlphabet(*this);
+ }
// class destructor
- virtual ~BinaryAlphabet() {}
+ virtual ~IntegerAlphabet() {}
public:
- unsigned int getSize() const { return 2; }
- unsigned int getNumberOfTypes() const { return 2; }
- std::string getAlphabetType() const { return "Binary alphabet"; }
- int getUnknownCharacterCode() const { return 3; }
- bool isUnresolved(int state) const { return state == 3; }
+ unsigned int getSize() const { return MAX_ + 1; }
+
+ unsigned int getNumberOfTypes() const { return MAX_ + 1; }
+
+ std::string getAlphabetType() const { return "Integer alphabet"; }
+
+ int getUnknownCharacterCode() const { return static_cast<int>(MAX_); }
+
+ bool isUnresolved(int state) const { return state == static_cast<int>(MAX_); }
+
bool isUnresolved(const std::string& state) const { return false; }
};
} // end of namespace bpp.
-#endif // _BINARYALPHABET_H_
-
+#endif // _INTEGERALPHABET_H_
diff --git a/src/Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.cpp b/src/Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.cpp
deleted file mode 100644
index 548c5e7..0000000
--- a/src/Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-//
-// File: InvertebrateMitochondrialCodonAlphabet.h
-// Authors: Eric Bazin
-// Sylvain Gaillard
-// Created on: thu mar 1 14:25:09 CET 2005
-//
-
-/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
-*/
-
-#include "InvertebrateMitochondrialCodonAlphabet.h"
-
-using namespace bpp;
-using namespace std;
-
-InvertebrateMitochondrialCodonAlphabet::InvertebrateMitochondrialCodonAlphabet(const NucleicAlphabet* alpha) : CodonAlphabet(alpha)
-{
- string A = alpha->intToChar(0);
- string G = alpha->intToChar(2);
- string T = alpha->intToChar(3);
-
- vector<string> vstop;
-
- vstop.push_back(T + A + A);
- vstop.push_back(T + A + G);
-
- int istop;
- unsigned int j;
- for (unsigned int i = 0 ; i < vstop.size() ; i++) {
- istop = charToInt(vstop[i]);
- stopCodons_.push_back(istop);
-
- j = 0;
- while (j < getNumberOfChars()) {
- if (getStateAt(j).getNum() == istop) {
- getStateAt(j).setName(STOP);
- break;
- }
- j++;
- }
- }
- initCodon_ = charToInt(A + T + G);
-}
-
diff --git a/src/Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.h b/src/Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.h
deleted file mode 100644
index a24fda3..0000000
--- a/src/Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.h
+++ /dev/null
@@ -1,75 +0,0 @@
-//
-// File: InvertebrateMitochondrialCodonAlphabet.h
-// Created by: Eric Bazin
-// Created on: thu mar 1 14:25:09 CET 2005
-//
-
-/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
-*/
-
-#ifndef _INVERTEBRATEMITOCHONDRIALCODONALPHABET_H_
-#define _INVERTEBRATEMITOCHONDRIALCODONALPHABET_H_
-
-
-#include "CodonAlphabet.h"
-
-namespace bpp
-{
-
-/**
- * @brief This class implements the Invertebrate
- * @author Laurent Guéguen
- *
- * Mitochondrial codon alphabet as describe on the NCBI website:
- * http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG5
- */
-class InvertebrateMitochondrialCodonAlphabet:
- public CodonAlphabet
-{
-public:
- InvertebrateMitochondrialCodonAlphabet(const NucleicAlphabet * alpha);
- virtual ~InvertebrateMitochondrialCodonAlphabet() {};
-
-public:
-
- std::string getAlphabetType() const
- {
- return "Codon alphabet: InvertebrateMitochondrial(" + vAbsAlph_[0]->getAlphabetType() + ")";
- }
-
-};
-
-} //end of namespace bpp.
-
-#endif //_INVERTEBRATEMITOCHONDRIALCODONALPHABET_H_
-
diff --git a/src/Bpp/Seq/Alphabet/StandardCodonAlphabet.h b/src/Bpp/Seq/Alphabet/LetterAlphabet.cpp
similarity index 65%
rename from src/Bpp/Seq/Alphabet/StandardCodonAlphabet.h
rename to src/Bpp/Seq/Alphabet/LetterAlphabet.cpp
index 7a011d2..9fd385f 100644
--- a/src/Bpp/Seq/Alphabet/StandardCodonAlphabet.h
+++ b/src/Bpp/Seq/Alphabet/LetterAlphabet.cpp
@@ -1,11 +1,11 @@
-//
-// File: StandardCodonAlphabet.h
-// Created by: Julien Dutheil
-// Created on: Sun Oct 12 17:51:36 2003
-//
+//
+// File: LetterAlphabet.cpp
+// Author: Sylvain Gaillard
+// Created: 11/09/2009 14:31:05
+//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (September 11, 2009)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -37,35 +37,9 @@ The fact that you are presently reading this means that you have had
knowledge of the CeCILL license and that you accept its terms.
*/
-#ifndef _STANDARDCODONALPHABET_H_
-#define _STANDARDCODONALPHABET_H_
+#include "LetterAlphabet.h"
-#include "CodonAlphabet.h"
+using namespace bpp;
-namespace bpp
-{
-
-/**
- * @brief This class implements the standard codon alphabet as describe on the NCBI
- * web site: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG1
- */
-class StandardCodonAlphabet : public CodonAlphabet
-{
-public:
- StandardCodonAlphabet(const NucleicAlphabet * alpha);
-
- virtual ~StandardCodonAlphabet() {}
-
-public:
-
- std::string getAlphabetType() const
- {
- return "Codon alphabet: Standard("+ vAbsAlph_[0]->getAlphabetType() + ")";
- }
-
-};
-
-} //end of namespace bpp.
-
-#endif //_STANDARDCODONALPHABET_H_
+const int LetterAlphabet::LETTER_UNDEF_VALUE = -99;
diff --git a/src/Bpp/Seq/Alphabet/LetterAlphabet.h b/src/Bpp/Seq/Alphabet/LetterAlphabet.h
index 4bbd359..e323abc 100644
--- a/src/Bpp/Seq/Alphabet/LetterAlphabet.h
+++ b/src/Bpp/Seq/Alphabet/LetterAlphabet.h
@@ -5,36 +5,36 @@
//
/*
-Copyright or © or Copr. Bio++ Development Team, (September 11, 2009)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
+ Copyright or © or Copr. Bio++ Development Team, (September 11, 2009)
+
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
*/
#ifndef _LETTERALPHABET_
@@ -57,48 +57,65 @@ namespace bpp {
class LetterAlphabet:
public AbstractAlphabet
{
- private:
- static const int LETTER_UNDEF_VALUE = -99;
- std::vector<int> letters_;
- bool caseSensitive_;
-
- public:
- LetterAlphabet(bool caseSensitive = false): letters_(256, LETTER_UNDEF_VALUE), caseSensitive_(caseSensitive) {}
- virtual ~LetterAlphabet() {}
-
- public:
- bool isCharInAlphabet(char state) const {
- return letters_[static_cast<unsigned int>(state)] != LETTER_UNDEF_VALUE;
+ private:
+ static const int LETTER_UNDEF_VALUE;
+ std::vector<int> letters_;
+ bool caseSensitive_;
+
+ public:
+ LetterAlphabet(bool caseSensitive = false): letters_(256, LETTER_UNDEF_VALUE), caseSensitive_(caseSensitive) {}
+
+ LetterAlphabet(const LetterAlphabet& bia) : AbstractAlphabet(bia), letters_(bia.letters_), caseSensitive_(bia.caseSensitive_) {}
+
+ LetterAlphabet& operator=(const LetterAlphabet& bia)
+ {
+ AbstractAlphabet::operator=(bia);
+ letters_=bia.letters_;
+ caseSensitive_=bia.caseSensitive_;
+
+ return *this;
+
+ }
+
+#ifndef NO_VIRTUAL_COV
+ virtual LetterAlphabet* clone() const = 0;
+#endif
+
+ virtual ~LetterAlphabet() {}
+
+ public:
+ bool isCharInAlphabet(char state) const {
+ return letters_[static_cast<unsigned int>(state)] != LETTER_UNDEF_VALUE;
+ }
+ bool isCharInAlphabet(const std::string& state) const {
+ return isCharInAlphabet(state[0]);
+ }
+ int charToInt(const std::string &state) const throw (BadCharException) {
+ if (!isCharInAlphabet(state))
+ throw BadCharException(state, "LetterAlphabet::charToInt: Unknown state", this);
+ return letters_[static_cast<unsigned int>(state[0])];
+ }
+
+ protected:
+ void registerState(AlphabetState* st) throw (Exception) {
+ AbstractAlphabet::registerState(st);
+ if (caseSensitive_) {
+ letters_[static_cast<unsigned int>(st->getLetter()[0])] = st->getNum();
+ } else {
+ letters_[static_cast<unsigned int>(tolower(st->getLetter()[0]))] = st->getNum();
+ letters_[static_cast<unsigned int>(toupper(st->getLetter()[0]))] = st->getNum();
}
- bool isCharInAlphabet(const std::string& state) const {
- return isCharInAlphabet(state[0]);
- }
- int charToInt(const std::string &state) const throw (BadCharException) {
- if (!isCharInAlphabet(state))
- throw BadCharException(state, "LetterAlphabet::charToInt: Unknown state", this);
- return letters_[static_cast<unsigned int>(state[0])];
- }
-
- protected:
- void registerState(const AlphabetState& st) {
- AbstractAlphabet::registerState(st);
- if (caseSensitive_) {
- letters_[static_cast<unsigned int>(st.getLetter()[0])] = st.getNum();
- } else {
- letters_[static_cast<unsigned int>(tolower(st.getLetter()[0]))] = st.getNum();
- letters_[static_cast<unsigned int>(toupper(st.getLetter()[0]))] = st.getNum();
- }
- }
-
- void setState(size_t pos, const AlphabetState& st) throw (IndexOutOfBoundsException) {
- AbstractAlphabet::setState(pos, st);
- if (caseSensitive_) {
- letters_[static_cast<unsigned int>(st.getLetter()[0])] = st.getNum();
- } else {
- letters_[static_cast<unsigned int>(tolower(st.getLetter()[0]))] = st.getNum();
- letters_[static_cast<unsigned int>(toupper(st.getLetter()[0]))] = st.getNum();
- }
+ }
+
+ void setState(size_t pos, AlphabetState* st) throw (Exception, IndexOutOfBoundsException) {
+ AbstractAlphabet::setState(pos, st);
+ if (caseSensitive_) {
+ letters_[static_cast<unsigned int>(st->getLetter()[0])] = st->getNum();
+ } else {
+ letters_[static_cast<unsigned int>(tolower(st->getLetter()[0]))] = st->getNum();
+ letters_[static_cast<unsigned int>(toupper(st->getLetter()[0]))] = st->getNum();
}
+ }
};
}
diff --git a/src/Bpp/Seq/Alphabet/NucleicAlphabet.h b/src/Bpp/Seq/Alphabet/NucleicAlphabet.h
index 8eca540..2001d83 100644
--- a/src/Bpp/Seq/Alphabet/NucleicAlphabet.h
+++ b/src/Bpp/Seq/Alphabet/NucleicAlphabet.h
@@ -7,36 +7,36 @@
//
/*
-Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
*/
#ifndef _NUCLEICALPHABET_H_
@@ -47,6 +47,7 @@ knowledge of the CeCILL license and that you accept its terms.
#include <map>
#include <iostream>
+#include <typeinfo>
namespace bpp
{
@@ -57,46 +58,55 @@ namespace bpp
* This class only implements a few methods, it is mainly designed for methods/classes
* that will require to work with both RNA and DNA.
*/
-class NucleicAlphabet :
- public LetterAlphabet
-{
+ class NucleicAlphabet :
+ public LetterAlphabet
+ {
private:
- std::map<int, unsigned int> binCodes_;
- void updateMaps_(int pos, const NucleicAlphabetState& st) {
+ std::map<int, size_t> binCodes_;
+ void updateBinMaps_(size_t pos, const NucleicAlphabetState& st) {
if (binCodes_.find(st.getBinaryCode()) == binCodes_.end())
binCodes_[st.getBinaryCode()] = pos;
}
- public:
- NucleicAlphabet(): binCodes_() {}
+ public:
+ NucleicAlphabet(): LetterAlphabet(), binCodes_() {}
+
+ NucleicAlphabet(const NucleicAlphabet& bia) : LetterAlphabet(bia), binCodes_(bia.binCodes_) {}
+
+ NucleicAlphabet& operator=(const NucleicAlphabet& bia)
+ {
+ LetterAlphabet::operator=(bia);
+ binCodes_ = bia.binCodes_;
+ return *this;
+ }
+
+#ifndef NO_VIRTUAL_COV
+ virtual NucleicAlphabet* clone() const = 0;
+#endif
- virtual ~NucleicAlphabet() {}
+ virtual ~NucleicAlphabet() {}
protected:
/**
* @name Overloaded methods from AbstractAlphabet
* @{
*/
- void registerState(const NucleicAlphabetState& st) {
- LetterAlphabet::registerState(st);
- updateMaps_(getNumberOfChars(), st);
+ void registerState(AlphabetState* st) throw (Exception) {
+ NucleicAlphabetState* nst = dynamic_cast<NucleicAlphabetState*>(st);
+ if (!nst)
+ throw Exception("NucleicAlphabet::registerState. Incorrect alphabet type.");
+ LetterAlphabet::registerState(nst);
+ updateBinMaps_(getNumberOfChars(), *nst);
}
- void setState(unsigned int pos, const NucleicAlphabetState& st) {
- LetterAlphabet::setState(pos, st);
- updateMaps_(pos, st);
+
+ void setState(size_t pos, AlphabetState* st) throw (Exception, IndexOutOfBoundsException) {
+ NucleicAlphabetState* nst = dynamic_cast<NucleicAlphabetState*>(st);
+ if (!nst)
+ throw Exception("NucleicAlphabet::setState. Incorrect alphabet type.");
+ LetterAlphabet::setState(pos, nst);
+ updateBinMaps_(pos, *nst);
}
- const NucleicAlphabetState& getStateAt(unsigned int pos) const
- throw (IndexOutOfBoundsException) {
- return dynamic_cast<const NucleicAlphabetState&>(
- AbstractAlphabet::getStateAt(pos)
- );
- }
- NucleicAlphabetState& getStateAt(unsigned int pos)
- throw (IndexOutOfBoundsException) {
- return dynamic_cast<NucleicAlphabetState&>(
- AbstractAlphabet::getStateAt(pos)
- );
- }
+
/** @} */
public:
@@ -104,18 +114,30 @@ class NucleicAlphabet :
* @name Overloaded methods from AbstractAlphabet
* @{
*/
+ const NucleicAlphabetState& getStateAt(size_t stateIndex) const
+ throw (IndexOutOfBoundsException) {
+ return dynamic_cast<const NucleicAlphabetState&>(
+ AbstractAlphabet::getStateAt(stateIndex)
+ );
+ }
+ NucleicAlphabetState& getStateAt(size_t stateIndex)
+ throw (IndexOutOfBoundsException) {
+ return dynamic_cast<NucleicAlphabetState&>(
+ AbstractAlphabet::getStateAt(stateIndex)
+ );
+ }
const NucleicAlphabetState& getState(const std::string& letter) const
throw (BadCharException) {
- return dynamic_cast<const NucleicAlphabetState&>(
- AbstractAlphabet::getState(letter)
- );
- }
+ return dynamic_cast<const NucleicAlphabetState&>(
+ AbstractAlphabet::getState(letter)
+ );
+ }
const NucleicAlphabetState& getState(int num) const
throw (BadIntException) {
- return dynamic_cast<const NucleicAlphabetState&>(
- AbstractAlphabet::getState(num)
- );
- }
+ return dynamic_cast<const NucleicAlphabetState&>(
+ AbstractAlphabet::getState(num)
+ );
+ }
/** @} */
/**
@@ -133,7 +155,7 @@ class NucleicAlphabet :
*/
const NucleicAlphabetState& getStateByBinCode(int code) const
throw (BadIntException) {
- std::map<int, unsigned int>::const_iterator it = binCodes_.find(code);
+ std::map<int, size_t>::const_iterator it = binCodes_.find(code);
if (it == binCodes_.end())
throw BadIntException(code, "NucleicAlphabet::getState(unsigned char): Binary code not in alphabet", this);
return getStateAt(it->second);
@@ -239,19 +261,19 @@ class NucleicAlphabet :
/** @} */
- public:
- // return 4 : A, C, G, T (or U)
- unsigned int getSize() const { return 4; }
+ public:
+ // return 4 : A, C, G, T (or U)
+ unsigned int getSize() const { return 4; }
- // return 15 : gap isn't included, generic unresolved bases (N, X, ?, O, 0) count for one
- unsigned int getNumberOfTypes() const { return 15; }
+ // return 15 : gap isn't included, generic unresolved bases (N, X, ?, O, 0) count for one
+ unsigned int getNumberOfTypes() const { return 15; }
int getUnknownCharacterCode() const { return 14; }
bool isUnresolved(int state) const { return state > 3; }
bool isUnresolved(const std::string& state) const { return charToInt(state) > 3; }
-};
+ };
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Alphabet/NumericAlphabet.cpp b/src/Bpp/Seq/Alphabet/NumericAlphabet.cpp
new file mode 100644
index 0000000..b93192b
--- /dev/null
+++ b/src/Bpp/Seq/Alphabet/NumericAlphabet.cpp
@@ -0,0 +1,189 @@
+//
+// File: NumericAlphabet.cpp
+// Created by: Laurent Gueguen
+// Created on: March 2010
+//
+
+/*
+ Copyright or � or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide
+ classes for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided
+ only with a limited warranty and the software's author, the holder of
+ the economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards
+ their requirements in conditions enabling the security of their
+ systems and/or data to be ensured and, more generally, to use and
+ operate it in the same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "NumericAlphabet.h" // class's header file
+
+#include <Bpp/Text/TextTools.h>
+
+#include <iostream>
+#include <typeinfo>
+
+using namespace std;
+using namespace bpp;
+
+/****************************************************************************************/
+
+NumericAlphabet::NumericAlphabet(const UniformDiscreteDistribution& pd) :
+ AbstractAlphabet(), pdd_(pd.clone()), values_()
+{
+
+ // Alphabet size definition
+ size_t size = pdd_->getNumberOfCategories();
+
+ Vdouble vd = pdd_->getCategories();
+
+ for (size_t i = 0; i < size; ++i){
+ registerState(new AlphabetNumericState(static_cast<int>(i), vd[i], TextTools::toString(vd[i]), TextTools::toString(vd[i])));
+ }
+}
+
+NumericAlphabet::NumericAlphabet(const NumericAlphabet& na) :
+ AbstractAlphabet(na),
+ pdd_(na.pdd_->clone()),
+ values_(na.values_)
+{}
+
+NumericAlphabet& NumericAlphabet::operator=(const NumericAlphabet& na)
+{
+ AbstractAlphabet::operator=(na);
+ pdd_ = na.pdd_->clone();
+ return *this;
+}
+
+/****************************************************************************************/
+
+void NumericAlphabet::setState(size_t stateIndex, AlphabetState* st) throw (Exception)
+{
+ try {
+ AbstractAlphabet::setState(stateIndex, st);
+ double x = dynamic_cast<AlphabetNumericState*>(st)->getValue();
+ if (values_.find(x) == values_.end())
+ values_[x] = stateIndex;
+ } catch(std::bad_cast&) {
+ throw Exception("NumericAlphabet::setState. Incorrect alphabet type.");
+ }
+}
+
+void NumericAlphabet::registerState(AlphabetState* st) throw (Exception)
+{
+ try {
+ AbstractAlphabet::registerState(st);
+ double x = dynamic_cast<AlphabetNumericState*>(st)->getValue();
+ if (values_.find(x) == values_.end())
+ values_[x] = getSize();
+ } catch(std::bad_cast&) {
+ throw Exception("NumericAlphabet::registerState. Incorrect alphabet type.");
+ }
+}
+
+
+vector<int> NumericAlphabet::getAlias(int state) const throw (BadIntException)
+{
+ vector<int> v;
+ v.push_back(state);
+ return v;
+}
+
+vector<string> NumericAlphabet::getAlias(const string& state) const throw (BadCharException)
+{
+ vector<string> v;
+ v.push_back(state);
+ return v;
+}
+
+/****************************************************************************************/
+
+bool NumericAlphabet::isGap(int state) const
+{
+ return false;
+}
+
+bool NumericAlphabet::containsGap(const string& state) const throw (BadCharException)
+{
+ return false;
+}
+
+bool NumericAlphabet::isUnresolved(const string& state) const
+{
+ return false;
+}
+
+bool NumericAlphabet::isUnresolved(int state) const
+{
+ return false;
+}
+
+unsigned int NumericAlphabet::getSize() const
+{
+ return static_cast<unsigned int>(values_.size());
+}
+
+unsigned int NumericAlphabet::getNumberOfTypes() const
+{
+ return static_cast<unsigned int>(values_.size());
+}
+
+void NumericAlphabet::remap()
+{
+ AbstractAlphabet::remap();
+ values_.clear();
+ for (size_t pos = 0 ; pos < getSize() ; pos++) {
+ double x = getStateAt(pos).getValue();
+ if (values_.find(x) == values_.end())
+ values_[x] = pos;
+ }
+}
+
+/************************************************************/
+
+double NumericAlphabet::getDelta() const
+{
+ return (pdd_->getUpperBound() - pdd_->getLowerBound()) / static_cast<double>(pdd_->getNumberOfCategories());
+}
+
+double NumericAlphabet::intToValue(int state) const throw (BadIntException)
+{
+ return static_cast<const AlphabetNumericState& >(getState(state)).getValue();
+}
+
+size_t NumericAlphabet::getValueIndex(double value) const
+{
+ map<double, size_t>::const_iterator it = values_.find(pdd_->getValueCategory(value));
+ return it->second;
+}
+
+AlphabetNumericState& NumericAlphabet::getStateAt(size_t stateIndex) throw (IndexOutOfBoundsException)
+{
+ return static_cast<AlphabetNumericState&>(AbstractAlphabet::getStateAt(stateIndex));
+}
+
+const AlphabetNumericState& NumericAlphabet::getStateAt(size_t stateIndex) const throw (IndexOutOfBoundsException)
+{
+ return static_cast<const AlphabetNumericState&>(AbstractAlphabet::getStateAt(stateIndex));
+}
diff --git a/src/Bpp/Seq/Alphabet/NumericAlphabet.h b/src/Bpp/Seq/Alphabet/NumericAlphabet.h
new file mode 100644
index 0000000..54ea5f9
--- /dev/null
+++ b/src/Bpp/Seq/Alphabet/NumericAlphabet.h
@@ -0,0 +1,131 @@
+//
+// File: NumericAlphabet.h
+// Created by: Laurent Gueguen
+// Created on: March 2010
+//
+
+/*
+ Copyright or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided
+ only with a limited warranty and the software's author, the holder of
+ the economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards
+ their requirements in conditions enabling the security of their
+ systems and/or data to be ensured and, more generally, to use and
+ operate it in the same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+*/
+
+
+#ifndef _NUMERICALPHABET_H_
+#define _NUMERICALPHABET_H_
+
+#include "AbstractAlphabet.h"
+#include "AlphabetNumericState.h"
+
+#include <Bpp/Numeric/Prob/UniformDiscreteDistribution.h>
+
+#include <string>
+
+/**
+ * @brief This alphabet is used to deal NumericAlphabet
+ * @author Laurent Guéguen
+ */
+
+namespace bpp
+{
+ class NumericAlphabet : public AbstractAlphabet
+ {
+ private:
+ const UniformDiscreteDistribution* pdd_;
+
+ std::map<double, size_t> values_;
+
+ public:
+
+ NumericAlphabet(const UniformDiscreteDistribution&);
+
+ virtual ~NumericAlphabet() { delete pdd_;}
+
+ NumericAlphabet(const NumericAlphabet&);
+
+ NumericAlphabet& operator=(const NumericAlphabet&);
+
+ NumericAlphabet* clone() const
+ {
+ return new NumericAlphabet(*this);
+ }
+
+ public:
+ void setState(size_t pos, AlphabetState* st) throw (Exception);
+ void registerState(AlphabetState* st) throw (Exception);
+
+ bool containsGap(const std::string& state) const throw (BadCharException);
+
+ unsigned int getSize() const;
+ unsigned int getNumberOfTypes() const;
+ int getUnknownCharacterCode() const { return -1; }
+ bool isGap(int state) const;
+ std::vector<int> getAlias(int state) const throw (BadIntException);
+ std::vector<std::string> getAlias(const std::string& state) const throw (BadCharException);
+ bool isUnresolved(int state) const;
+ bool isUnresolved(const std::string& state) const;
+
+ std::string getAlphabetType() const { return "Numeric alphabet"; }
+
+ AlphabetNumericState& getStateAt(size_t stateIndex) throw (IndexOutOfBoundsException);
+ const AlphabetNumericState& getStateAt(size_t stateIndex) const throw (IndexOutOfBoundsException);
+
+ /**
+ * @ brief Specific methods
+ *
+ */
+
+ /**
+ * @brief Returns the difference between successive values
+ *
+ */
+ double getDelta() const;
+
+ /**
+ * @brief Returns the value for the character number
+ *
+ */
+ double intToValue(int state) const throw (BadIntException);
+
+ /**
+ * @brief Returns the CategoryIndex of the category to which the value belongs.
+ *
+ */
+ size_t getValueIndex(double value) const;
+
+ /**
+ * @brief Re-update the maps.
+ */
+ void remap();
+
+ };
+}
+#endif // _NUMERICALPHABET_H_
+
diff --git a/src/Bpp/Seq/Alphabet/ProteicAlphabet.cpp b/src/Bpp/Seq/Alphabet/ProteicAlphabet.cpp
index 003299e..d400680 100644
--- a/src/Bpp/Seq/Alphabet/ProteicAlphabet.cpp
+++ b/src/Bpp/Seq/Alphabet/ProteicAlphabet.cpp
@@ -7,37 +7,37 @@
//
/*
-Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
-*/
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
#include "ProteicAlphabet.h"
#include "ProteicAlphabetState.h"
@@ -54,112 +54,136 @@ using namespace std;
ProteicAlphabet::ProteicAlphabet()
{
- // Alphabet size definition
- resize(28);
-
- // Alphabet content definition
- setState( 0, ProteicAlphabetState(-1, "-", "GAP", "Gap"));
- setState( 1, ProteicAlphabetState( 0, "A", "ALA", "Alanine"));
- setState( 2, ProteicAlphabetState( 1, "R", "ARG", "Arginine"));
- setState( 3, ProteicAlphabetState( 2, "N", "ASN", "Asparagine"));
- setState( 4, ProteicAlphabetState( 3, "D", "ASP", "Asparatic Acid"));
- setState( 5, ProteicAlphabetState( 4, "C", "CYS", "Cysteine"));
- setState( 6, ProteicAlphabetState( 5, "Q", "GLN", "Glutamine"));
- setState( 7, ProteicAlphabetState( 6, "E", "GLU", "Glutamic acid"));
- setState( 8, ProteicAlphabetState( 7, "G", "GLY", "Glycine"));
- setState( 9, ProteicAlphabetState( 8, "H", "HIS", "Histidine"));
- setState(10, ProteicAlphabetState( 9, "I", "ILE", "Isoleucine"));
- setState(11, ProteicAlphabetState(10, "L", "LEU", "Leucine"));
- setState(12, ProteicAlphabetState(11, "K", "LYS", "Lysine"));
- setState(13, ProteicAlphabetState(12, "M", "MET", "Methionine"));
- setState(14, ProteicAlphabetState(13, "F", "PHE", "Phenylalanine"));
- setState(15, ProteicAlphabetState(14, "P", "PRO", "Proline"));
- setState(16, ProteicAlphabetState(15, "S", "SER", "Serine"));
- setState(17, ProteicAlphabetState(16, "T", "THR", "Threonine"));
- setState(18, ProteicAlphabetState(17, "W", "TRP", "Tryptophan"));
- setState(19, ProteicAlphabetState(18, "Y", "TYR", "Tyrosine"));
- setState(20, ProteicAlphabetState(19, "V", "VAL", "Valine"));
- setState(21, ProteicAlphabetState(20, "B", "B", "N or D"));
- setState(22, ProteicAlphabetState(21, "Z", "Z", "Q or E"));
- setState(23, ProteicAlphabetState(22, "X", "X", "Unresolved amino acid"));
- setState(24, ProteicAlphabetState(22, "O", "O", "Unresolved amino acid"));
- setState(25, ProteicAlphabetState(22, "0", "0", "Unresolved amino acid"));
- setState(26, ProteicAlphabetState(22, "?", "?", "Unresolved amino acid"));
- setState(27, ProteicAlphabetState(-2, "*", "STOP", "Stop"));
+ // Alphabet content definition
+ registerState(new ProteicAlphabetState(-1, "-", "GAP", "Gap"));
+ registerState(new ProteicAlphabetState( 0, "A", "ALA", "Alanine"));
+ registerState(new ProteicAlphabetState( 1, "R", "ARG", "Arginine"));
+ registerState(new ProteicAlphabetState( 2, "N", "ASN", "Asparagine"));
+ registerState(new ProteicAlphabetState( 3, "D", "ASP", "Asparatic Acid"));
+ registerState(new ProteicAlphabetState( 4, "C", "CYS", "Cysteine"));
+ registerState(new ProteicAlphabetState( 5, "Q", "GLN", "Glutamine"));
+ registerState(new ProteicAlphabetState( 6, "E", "GLU", "Glutamic acid"));
+ registerState(new ProteicAlphabetState( 7, "G", "GLY", "Glycine"));
+ registerState(new ProteicAlphabetState( 8, "H", "HIS", "Histidine"));
+ registerState(new ProteicAlphabetState( 9, "I", "ILE", "Isoleucine"));
+ registerState(new ProteicAlphabetState(10, "L", "LEU", "Leucine"));
+ registerState(new ProteicAlphabetState(11, "K", "LYS", "Lysine"));
+ registerState(new ProteicAlphabetState(12, "M", "MET", "Methionine"));
+ registerState(new ProteicAlphabetState(13, "F", "PHE", "Phenylalanine"));
+ registerState(new ProteicAlphabetState(14, "P", "PRO", "Proline"));
+ registerState(new ProteicAlphabetState(15, "S", "SER", "Serine"));
+ registerState(new ProteicAlphabetState(16, "T", "THR", "Threonine"));
+ registerState(new ProteicAlphabetState(17, "W", "TRP", "Tryptophan"));
+ registerState(new ProteicAlphabetState(18, "Y", "TYR", "Tyrosine"));
+ registerState(new ProteicAlphabetState(19, "V", "VAL", "Valine"));
+ registerState(new ProteicAlphabetState(20, "B", "B", "N or D"));
+ registerState(new ProteicAlphabetState(21, "Z", "Z", "Q or E"));
+ registerState(new ProteicAlphabetState(22, "X", "X", "Unresolved amino acid"));
+ registerState(new ProteicAlphabetState(22, "O", "O", "Unresolved amino acid"));
+ registerState(new ProteicAlphabetState(22, "0", "0", "Unresolved amino acid"));
+ registerState(new ProteicAlphabetState(22, "?", "?", "Unresolved amino acid"));
+ registerState(new ProteicAlphabetState(-2, "*", "STOP", "Stop"));
}
/******************************************************************************/
string ProteicAlphabet::getAbbr(const string& aa) const throw (AlphabetException)
{
- string AA = TextTools::toUpper(aa);
- return (getState(aa).getAbbreviation());
+ string AA = TextTools::toUpper(aa);
+ return getState(aa).getAbbreviation();
}
/******************************************************************************/
string ProteicAlphabet::getAbbr(int aa) const throw (AlphabetException)
{
- return (getState(aa).getAbbreviation());
+ return getState(aa).getAbbreviation();
}
/******************************************************************************/
-vector<int> ProteicAlphabet::getAlias(int state) const throw (BadIntException)
+vector<int> ProteicAlphabet::getAlias(int state) const throw (BadIntException)
{
- if(!isIntInAlphabet(state)) throw BadIntException(state, "ProteicAlphabet::getAlias(int): Specified base unknown.");
- vector<int> v;
- if(state == 20) {// N or D
- v.resize(2); v[0] = 2; v[1] = 3;
- } else if(state == 21) {// Q or E
- v.resize(2); v[0] = 5; v[1] = 6;
- } else if(state == 22) {// all!
- v.resize(20);
- for(unsigned int i = 0; i < 20; i++) v[i] = i;
- } else {
- v.resize(1); v[0] = state;
- }
- return v;
+ if (!isIntInAlphabet(state))
+ throw BadIntException(state, "ProteicAlphabet::getAlias(int): Specified base unknown.");
+ vector<int> v;
+ if (state == 20) // N or D
+ {
+ v.resize(2); v[0] = 2; v[1] = 3;
+ }
+ else if (state == 21) // Q or E
+ {
+ v.resize(2); v[0] = 5; v[1] = 6;
+ }
+ else if (state == 22) // all!
+ {
+ v.resize(20);
+ for (size_t i = 0; i < 20; i++)
+ {
+ v[i] = static_cast<int>(i);
+ }
+ }
+ else
+ {
+ v.resize(1); v[0] = state;
+ }
+ return v;
}
/******************************************************************************/
-vector<string> ProteicAlphabet::getAlias(const string & state) const throw (BadCharException)
+vector<string> ProteicAlphabet::getAlias(const string& state) const throw (BadCharException)
{
string locstate = TextTools::toUpper(state);
- if(!isCharInAlphabet(locstate)) throw BadCharException(locstate, "ProteicAlphabet::getAlias(int): Specified base unknown.");
- vector<string> v;
- if(locstate == "B") {// N or D
- v.resize(2); v[0] = "N"; v[1] = "D";
- } else if(locstate == "Z") {// Q or E
- v.resize(2); v[0] = "Q"; v[1] = "E";
- } else if(locstate == "X"
- || locstate == "O"
- || locstate == "0"
- || locstate == "?") {// all!
- v.resize(20);
- for(unsigned int i = 0; i < 20; i++) v[i] = getState(i).getLetter();
- } else {
- v.resize(1); v[0] = locstate;
- }
- return v;
+ if (!isCharInAlphabet(locstate))
+ throw BadCharException(locstate, "ProteicAlphabet::getAlias(int): Specified base unknown.");
+ vector<string> v;
+ if (locstate == "B") // N or D
+ {
+ v.resize(2); v[0] = "N"; v[1] = "D";
+ }
+ else if (locstate == "Z") // Q or E
+ {
+ v.resize(2); v[0] = "Q"; v[1] = "E";
+ }
+ else if (locstate == "X"
+ || locstate == "O"
+ || locstate == "0"
+ || locstate == "?") // all!
+ {
+ v.resize(20);
+ for (int i = 0; i < 20; i++)
+ {
+ v[static_cast<size_t>(i)] = getState(i).getLetter();
+ }
+ }
+ else
+ {
+ v.resize(1); v[0] = locstate;
+ }
+ return v;
}
/******************************************************************************/
-int ProteicAlphabet::getGeneric(const vector<int> & states) const throw (BadIntException) {
+int ProteicAlphabet::getGeneric(const vector<int>& states) const throw (BadIntException)
+{
map<int, int> m;
- for (unsigned int i = 0 ; i < states.size() ; ++i) {
+ for (unsigned int i = 0; i < states.size(); ++i)
+ {
vector<int> tmp_s = this->getAlias(states[i]); // get the states for generic characters
- for (unsigned int j = 0 ; j < tmp_s.size() ; ++j) {
- m[tmp_s[j]] ++; // add each state to the list
+ for (unsigned int j = 0; j < tmp_s.size(); ++j)
+ {
+ m[tmp_s[j]]++; // add each state to the list
}
}
vector<int> ve = MapTools::getKeys(m);
string key;
- for (unsigned int i = 0 ; i < ve.size() ; ++i) {
- if (!isIntInAlphabet(ve[i])) throw BadIntException(ve[i], "ProteicAlphabet::getGeneric(const vector<int>): Specified base unknown.");
+ for (unsigned int i = 0; i < ve.size(); ++i)
+ {
+ if (!isIntInAlphabet(ve[i]))
+ throw BadIntException(ve[i], "ProteicAlphabet::getGeneric(const vector<int>): Specified base unknown.");
key += "_" + TextTools::toString(ve[i]);
}
map<string, int> g;
@@ -167,11 +191,16 @@ int ProteicAlphabet::getGeneric(const vector<int> & states) const throw (BadIntE
g["_5_6"] = 21;
int v;
map<string, int>::iterator it = g.find(key);
- if (ve.size() == 1) {
+ if (ve.size() == 1)
+ {
v = ve[0];
- } else if (it != g.end()) {
+ }
+ else if (it != g.end())
+ {
v = it->second;
- } else {
+ }
+ else
+ {
v = 22;
}
return v;
@@ -179,19 +208,24 @@ int ProteicAlphabet::getGeneric(const vector<int> & states) const throw (BadIntE
/******************************************************************************/
-string ProteicAlphabet::getGeneric(const vector<string> & states) const throw (BadCharException) {
- map <string, int> m;
- for (unsigned int i = 0 ; i < states.size() ; ++i) {
+string ProteicAlphabet::getGeneric(const vector<string>& states) const throw (BadCharException)
+{
+ map<string, int> m;
+ for (unsigned int i = 0; i < states.size(); ++i)
+ {
vector<string> tmp_s = this->getAlias(states[i]); // get the states for generic characters
- for (unsigned int j = 0 ; j < tmp_s.size() ; ++j) {
- m[tmp_s[j]] ++; // add each state to the list
+ for (unsigned int j = 0; j < tmp_s.size(); ++j)
+ {
+ m[tmp_s[j]]++; // add each state to the list
}
}
vector<string> ve = MapTools::getKeys(m);
string key;
- for (unsigned int i = 0 ; i < ve.size() ; ++i) {
- if (!isCharInAlphabet(ve[i])) throw BadCharException(ve[i], "ProteicAlphabet::getAlias(const vector<string>): Specified base unknown.");
+ for (unsigned int i = 0; i < ve.size(); ++i)
+ {
+ if (!isCharInAlphabet(ve[i]))
+ throw BadCharException(ve[i], "ProteicAlphabet::getAlias(const vector<string>): Specified base unknown.");
key += TextTools::toString(ve[i]);
}
map<string, string> g;
@@ -199,11 +233,16 @@ string ProteicAlphabet::getGeneric(const vector<string> & states) const throw (B
g["EQ"] = "Z";
string v;
map<string, string>::iterator it = g.find(key);
- if (ve.size() == 1) {
+ if (ve.size() == 1)
+ {
v = ve[0];
- } else if (it != g.end()) {
+ }
+ else if (it != g.end())
+ {
v = it->second;
- } else {
+ }
+ else
+ {
v = "?";
}
return v;
diff --git a/src/Bpp/Seq/Alphabet/ProteicAlphabet.h b/src/Bpp/Seq/Alphabet/ProteicAlphabet.h
index 48d2071..98274ac 100644
--- a/src/Bpp/Seq/Alphabet/ProteicAlphabet.h
+++ b/src/Bpp/Seq/Alphabet/ProteicAlphabet.h
@@ -7,36 +7,36 @@
//
/*
-Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
*/
@@ -56,47 +56,72 @@ namespace bpp
* Gaps are coded by '-', unresolved characters are coded by 'X'.
*/
-class ProteicAlphabet:
- public LetterAlphabet
-{
+ class ProteicAlphabet:
+ public LetterAlphabet
+ {
/**
* @name Overloaded methods from AbstractAlphabet
* @{
*/
public:
const ProteicAlphabetState& getState(const std::string& letter) const
- throw (BadCharException) {
- return dynamic_cast<const ProteicAlphabetState&>(
- AbstractAlphabet::getState(letter)
- );
- }
+ throw (BadCharException)
+ {
+ return dynamic_cast<const ProteicAlphabetState&>(
+ AbstractAlphabet::getState(letter)
+ );
+ }
+
const ProteicAlphabetState& getState(int num) const
- throw (BadIntException) {
- return dynamic_cast<const ProteicAlphabetState&>(
- AbstractAlphabet::getState(num)
- );
- }
+ throw (BadIntException)
+ {
+ return dynamic_cast<const ProteicAlphabetState&>(
+ AbstractAlphabet::getState(num)
+ );
+ }
+
protected:
- const ProteicAlphabetState& getStateAt(unsigned int pos) const
- throw (IndexOutOfBoundsException) {
- return dynamic_cast<const ProteicAlphabetState&>(
- AbstractAlphabet::getStateAt(pos)
- );
- }
- ProteicAlphabetState& getStateAt(unsigned int pos)
- throw (IndexOutOfBoundsException) {
- return dynamic_cast<ProteicAlphabetState&>(
- AbstractAlphabet::getStateAt(pos)
- );
- }
+
+ const ProteicAlphabetState& getStateAt(size_t pos) const
+ throw (IndexOutOfBoundsException)
+ {
+ return dynamic_cast<const ProteicAlphabetState&>(
+ AbstractAlphabet::getStateAt(pos)
+ );
+ }
+
+ ProteicAlphabetState& getStateAt(size_t pos)
+ throw (IndexOutOfBoundsException)
+ {
+ return dynamic_cast<ProteicAlphabetState&>(
+ AbstractAlphabet::getStateAt(pos)
+ );
+ }
+
/** @} */
- public:
- ProteicAlphabet();
- virtual ~ProteicAlphabet() {}
-
- public:
- unsigned int getSize() const { return 20; }
- unsigned int getNumberOfTypes() const { return 23; }
+ public:
+ ProteicAlphabet();
+
+ ProteicAlphabet(const ProteicAlphabet& bia) : LetterAlphabet(bia) {}
+
+ ProteicAlphabet& operator=(const ProteicAlphabet& bia)
+ {
+ LetterAlphabet::operator=(bia);
+ return *this;
+ }
+
+ ProteicAlphabet* clone() const
+ {
+ return new ProteicAlphabet(*this);
+ }
+
+
+ virtual ~ProteicAlphabet() {}
+
+
+ public:
+ unsigned int getSize() const { return 20; }
+ unsigned int getNumberOfTypes() const { return 23; }
int getUnknownCharacterCode() const { return 22; }
std::vector<int> getAlias(int state) const throw (BadIntException);
std::vector<std::string> getAlias(const std::string& state) const throw (BadCharException);
@@ -106,30 +131,30 @@ class ProteicAlphabet:
bool isUnresolved(const std::string& state) const { return charToInt(state) > 19; }
std::string getAlphabetType() const { return "Proteic alphabet"; }
- public:
+ public:
- /**
- * @name Specific methods
- *
- * @{
- */
+ /**
+ * @name Specific methods
+ *
+ * @{
+ */
- /**
- * @brief Get the abbreviation (3 letter code) for a state coded as char.
- *
- * @param aa Char description of the amino-acid to analyse.
- */
+ /**
+ * @brief Get the abbreviation (3 letter code) for a state coded as char.
+ *
+ * @param aa Char description of the amino-acid to analyse.
+ */
std::string getAbbr(const std::string & aa) const throw (AlphabetException);
- /**
- * @brief Get the abbreviation (3 letter code) for a state coded as int.
- *
- * @param aa Int description of the amino-acid to analyse.
- */
+ /**
+ * @brief Get the abbreviation (3 letter code) for a state coded as int.
+ *
+ * @param aa Int description of the amino-acid to analyse.
+ */
std::string getAbbr(int aa) const throw (AlphabetException);
- /** @} */
+ /** @} */
-};
+ };
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Alphabet/RNA.cpp b/src/Bpp/Seq/Alphabet/RNA.cpp
index 53f065e..e117b0b 100644
--- a/src/Bpp/Seq/Alphabet/RNA.cpp
+++ b/src/Bpp/Seq/Alphabet/RNA.cpp
@@ -54,35 +54,32 @@ using namespace std;
// class constructor
RNA::RNA(bool exclamationMarkCountsAsGap)
{
- // Alphabet size definition
- resize(21);
-
// Alphabet content definition
// all unresolved bases use n°14
- setState( 0, NucleicAlphabetState(-1, "-", 0, "Gap"));
- setState( 1, NucleicAlphabetState( 0, "A", 1, "Adenine"));
- setState( 2, NucleicAlphabetState( 1, "C", 2, "Cytosine"));
- setState( 3, NucleicAlphabetState( 2, "G", 4, "Guanine"));
- setState( 4, NucleicAlphabetState( 3, "U", 8, "Uracile"));
- setState( 5, NucleicAlphabetState( 4, "M", 3, "Adenine or Cytosine"));
- setState( 6, NucleicAlphabetState( 5, "R", 5, "Purine (Adenine or Guanine)"));
- setState( 7, NucleicAlphabetState( 6, "W", 9, "Adenine or Uracile"));
- setState( 8, NucleicAlphabetState( 7, "S", 6, "Cytosine or Guanine"));
- setState( 9, NucleicAlphabetState( 8, "Y", 10, "Pyrimidine (Cytosine or Uracile)"));
- setState(10, NucleicAlphabetState( 9, "K", 12, "Guanine or Uracile"));
- setState(11, NucleicAlphabetState(10, "V", 7, "Adenine or Cytosine or Guanine"));
- setState(12, NucleicAlphabetState(11, "H", 11, "Adenine or Cytosine or Uracile"));
- setState(13, NucleicAlphabetState(12, "D", 13, "Adenine or Guanine or Uracile"));
- setState(14, NucleicAlphabetState(13, "B", 14, "Cytosine or Guanine or Uracile"));
- setState(15, NucleicAlphabetState(14, "N", 15, "Unresolved base"));
- setState(16, NucleicAlphabetState(14, "X", 15, "Unresolved base"));
- setState(17, NucleicAlphabetState(14, "O", 15, "Unresolved base"));
- setState(18, NucleicAlphabetState(14, "0", 15, "Unresolved base"));
- setState(19, NucleicAlphabetState(14, "?", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(-1, "-", 0, "Gap"));
+ registerState(new NucleicAlphabetState( 0, "A", 1, "Adenine"));
+ registerState(new NucleicAlphabetState( 1, "C", 2, "Cytosine"));
+ registerState(new NucleicAlphabetState( 2, "G", 4, "Guanine"));
+ registerState(new NucleicAlphabetState( 3, "U", 8, "Uracile"));
+ registerState(new NucleicAlphabetState( 4, "M", 3, "Adenine or Cytosine"));
+ registerState(new NucleicAlphabetState( 5, "R", 5, "Purine (Adenine or Guanine)"));
+ registerState(new NucleicAlphabetState( 6, "W", 9, "Adenine or Uracile"));
+ registerState(new NucleicAlphabetState( 7, "S", 6, "Cytosine or Guanine"));
+ registerState(new NucleicAlphabetState( 8, "Y", 10, "Pyrimidine (Cytosine or Uracile)"));
+ registerState(new NucleicAlphabetState( 9, "K", 12, "Guanine or Uracile"));
+ registerState(new NucleicAlphabetState(10, "V", 7, "Adenine or Cytosine or Guanine"));
+ registerState(new NucleicAlphabetState(11, "H", 11, "Adenine or Cytosine or Uracile"));
+ registerState(new NucleicAlphabetState(12, "D", 13, "Adenine or Guanine or Uracile"));
+ registerState(new NucleicAlphabetState(13, "B", 14, "Cytosine or Guanine or Uracile"));
+ registerState(new NucleicAlphabetState(14, "N", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(14, "X", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(14, "O", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(14, "0", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(14, "?", 15, "Unresolved base"));
if (exclamationMarkCountsAsGap)
- setState(20, NucleicAlphabetState(-1, "!", 0, "Unresolved base"));
+ registerState(new NucleicAlphabetState(-1, "!", 0, "Frameshift"));
else
- setState(20, NucleicAlphabetState(14, "!", 15, "Unresolved base"));
+ registerState(new NucleicAlphabetState(14, "!", 15, "Unresolved base"));
}
/******************************************************************************/
diff --git a/src/Bpp/Seq/Alphabet/RNA.h b/src/Bpp/Seq/Alphabet/RNA.h
index 068bde5..7fe56ef 100644
--- a/src/Bpp/Seq/Alphabet/RNA.h
+++ b/src/Bpp/Seq/Alphabet/RNA.h
@@ -5,36 +5,36 @@
//
/*
-Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
*/
#ifndef _RNA_H_
@@ -55,24 +55,38 @@ namespace bpp
* Gaps are coded by '-', unresolved characters are coded by 'X, N, O, 0 or ?'.
* Extensive support for generic characters (e.g. 'P', 'Y', etc.) is provided.
*/
-class RNA:
- public NucleicAlphabet
-{
- public:
+ class RNA:
+ public NucleicAlphabet
+ {
+ public:
/**
* @param exclamationMarkCountsAsGap If yes, '!' characters are replaced by gaps.
* Otherwise, they are counted as unknown characters.
*/
- RNA(bool exclamationMarkCountsAsGap = false);
- virtual ~RNA() {}
+ RNA(bool exclamationMarkCountsAsGap = false);
+
+ RNA(const RNA& bia) : NucleicAlphabet(bia) {}
+
+ RNA& operator=(const RNA& bia)
+ {
+ NucleicAlphabet::operator=(bia);
+ return *this;
+ }
+
+ RNA* clone() const
+ {
+ return new RNA(*this);
+ }
+
+ virtual ~RNA() {}
- public:
+ public:
std::vector<int> getAlias(int state) const throw (BadIntException);
std::vector<std::string> getAlias(const std::string & state) const throw (BadCharException);
int getGeneric(const std::vector<int> & states) const throw (BadIntException);
std::string getGeneric(const std::vector<std::string> & states) const throw (BadCharException);
std::string getAlphabetType() const { return "RNA alphabet"; }
-};
+ };
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Alphabet/RNY.cpp b/src/Bpp/Seq/Alphabet/RNY.cpp
index 0df099d..8088897 100644
--- a/src/Bpp/Seq/Alphabet/RNY.cpp
+++ b/src/Bpp/Seq/Alphabet/RNY.cpp
@@ -1,515 +1,533 @@
-//
-// File: RNY.cpp
-// Created by: Laurent Gueguen
-// Created on: Tue Jul 31 2007
-//
-
-/*
- Copyright or � or Copr. CNRS, (November 17, 2004)
-
- This software is a computer program whose purpose is to provide
- classes for sequences analysis.
-
- This software is governed by the CeCILL license under French law and
- abiding by the rules of distribution of free software. You can use,
- modify and/ or redistribute the software under the terms of the CeCILL
- license as circulated by CEA, CNRS and INRIA at the following URL
- "http://www.cecill.info".
-
- As a counterpart to the access to the source code and rights to copy,
- modify and redistribute granted by the license, users are provided
- only with a limited warranty and the software's author, the holder of
- the economic rights, and the successive licensors have only limited
- liability.
-
- In this respect, the user's attention is drawn to the risks associated
- with loading, using, modifying and/or developing or reproducing the
- software by the user in light of its specific status of free software,
- that may mean that it is complicated to manipulate, and that also
- therefore means that it is reserved for developers and experienced
- professionals having in-depth computer knowledge. Users are therefore
- encouraged to load and test the software's suitability as regards
- their requirements in conditions enabling the security of their
- systems and/or data to be ensured and, more generally, to use and
- operate it in the same conditions as regards security.
-
- The fact that you are presently reading this means that you have had
- knowledge of the CeCILL license and that you accept its terms.
- */
-
-#include "RNY.h" // class's header file
-
-// From Utils:
-#include <Bpp/Text/TextTools.h>
-
-#include <iostream>
-using namespace std;
-using namespace bpp;
-
-/****************************************************************************************/
-
-RNY::RNY(const NucleicAlphabet& na) : nuclalph_(na)
-{
- // Alphabet size definition
- resize(351);
- int i,j,k,l;
-
- for (i = 0; i < 351; i++)
- {
- setState(i,AlphabetState(i,"",""));
- }
-
- // Alphabet content definition
-
- // / changer pour ARN
-
- string s1 = "RCT-";
- string s2 = "AGCT-";
- string s3 = "AGY-";
- string s = " ";
-
-
- // NNN (0->35)
-
- for (i = 0; i < 3; i++)
- {
- for (j = 0; j < 4; j++)
- {
- for (k = 0; k < 3; k++)
- {
- l = i * 12 + j * 3 + k;
- s[0] = s1[i];
- s[1] = s2[j];
- s[2] = s3[k];
- setState(l, AlphabetState(l,s,s));
- }
- }
- }
-
- // NN- (50->83)
-
- for (i = 0; i < 3; i++)
- {
- for (j = 0; j < 4; j++)
- {
- l = 50 + 12 * i + j * 3;
- s[0] = s1[i];
- s[1] = s2[j];
- s[2] = s3[3];
- setState(l, AlphabetState(l,s,s));
- }
- }
-
- // N-N (100->126)
-
- for (i = 0; i < 3; i++)
- {
- for (k = 0; k < 3; k++)
- {
- l = 100 + 12 * i + k;
- s[0] = s1[i];
- s[1] = s2[4];
- s[2] = s3[k];
- setState(l, AlphabetState(l,s,s));
- }
- }
-
- // N-- (150->152)
-
- for (i = 0; i < 3; i++)
- {
- l = 150 + 12 * i;
- s[0] = s1[i];
- s[1] = s2[4];
- s[2] = s3[3];
- setState(l, AlphabetState(l,s,s));
- }
-
- // -NN (200->211)
-
- for (j = 0; j < 4; j++)
- {
- for (k = 0; k < 3; k++)
- {
- l = 200 + j * 3 + k;
- s[0] = s1[3];
- s[1] = s2[j];
- s[2] = s3[k];
- setState(l, AlphabetState(l,s,s));
- }
- }
-
-
- // -N- (250->253)
-
- for (j = 0; j < 4; j++)
- {
- l = 250 + 3 * j;
- s[0] = s1[3];
- s[1] = s2[j];
- s[2] = s3[3];
- setState(l, AlphabetState(l,s,s));
- }
-
- // --N (300->302)
-
- for (k = 0; k < 3; k++)
- {
- l = 300 + k;
- s[0] = s1[3];
- s[1] = s2[4];
- s[2] = s3[k];
- setState(l, AlphabetState(l,s,s));
- }
-
-
- // --- (350)
-
- l = 350;
- s[0] = s1[3];
- s[1] = s2[4];
- s[2] = s3[3];
- setState(l, AlphabetState(l,s,s));
-}
-
-/****************************************************************************************/
-
-vector<int> RNY::getAlias(int state) const throw (BadIntException)
-{
- if (!isIntInAlphabet(state)) throw BadIntException(state, "RNY::getAlias(int): Specified base unknown.");
- vector<int> v;
-
- int qs = state / 50;
- int rs = state % 50;
- int i,j,k;
-
- switch (qs)
- {
- case 0: // NNN
- v.resize(1);
- v[0] = rs;
- break;
- case 1: // NN-
- v.resize(3);
- for (k = 0; k < 3; k++)
- {
- v[k] = k + rs;
- }
- break;
- case 2: // N-N
- v.resize(4);
- for (j = 0; j < 4; j++)
- {
- v[j] = 3 * j + rs;
- }
- break;
- case 3: // N--
- v.resize(12);
- for (j = 0; j < 4; j++)
- {
- for (k = 0; k < 3; k++)
- {
- v[3 * j + k] = rs + 3 * j + k;
- }
- }
- break;
- case 4: // -NN
- v.resize(3);
- for (i = 0; i < 3; i++)
- {
- v[i] = 12 * i + rs;
- }
- break;
- case 5: // -N-
- v.resize(9);
- for (i = 0; i < 3; i++)
- {
- for (k = 0; k < 3; k++)
- {
- v[3 * i + k] = rs + 12 * i + k;
- }
- }
- break;
- case 6: // --N
- v.resize(12);
- for (i = 0; i < 3; i++)
- {
- for (j = 0; j < 4; j++)
- {
- v[4 * i + j] = rs + 12 * i + 3 * j;
- }
- }
- break;
- case 7: // ---
- v.resize(36);
- for (i = 0; i < 3; i++)
- {
- for (j = 0; j < 4; j++)
- {
- for (k = 0; k < 3; k++)
- {
- v[12 * i + 3 * j + k] = 12 * i + 3 * j + k;
- }
- }
- }
- break;
- }
- return v;
-}
-
-const NucleicAlphabet& RNY::getLetterAlphabet() const
-{
- return nuclalph_;
-}
-
-/****************************************************************************************/
-
-vector<string> RNY::getAlias(const string& state) const throw (BadCharException)
-{
- if (!isCharInAlphabet(state)) throw BadCharException(state, "RNY::getAlias(int): Specified base unknown.");
-
- vector<int> v = getAlias(charToInt(state));
- vector<string> s;
- size_t size = v.size();
- s.resize(size);
-
- for (size_t i = 0; i < size; i++)
- {
- s[i] = AbstractAlphabet::intToChar(v[i]);
- }
- return s;
-}
-
-/****************************************************************************************/
-
-string RNY::getRNY(const string& pos1, const string& pos2, const string& pos3) const throw (BadCharException)
-{
- string tr;
-
- if (pos1 == "A" || pos1 == "G")
- tr = "R";
- else
- tr = pos1;
-
- tr += pos2;
-
- if (pos3 == "T" || pos3 == "C")
- tr += "Y";
- else
- tr += pos3;
-
- // teste triplet;
- charToInt(tr);
- return tr;
-}
-
-/**************************************************************************************/
-int RNY::getRNY(int i, int j, int k, const Alphabet& alph) const throw (BadCharException)
-{
- if (alph.getAlphabetType() != "DNA alphabet")
- {
- throw AlphabetException ("RNY::getRNY : Sequence must be DNA",
- &alph);
- }
-
- char li = alph.intToChar(i)[0];
- char lj = alph.intToChar(j)[0];
- char lk = alph.intToChar(k)[0];
-
- int r = 0;
- int s = 0;
-
- switch (li)
- {
- case 'A':
- case 'G':
- r += 0;
- break;
- case 'C':
- r += 1;
- break;
- case 'T':
- r += 2;
- break;
- case '-':
- case 'N':
- s += 1;
- break;
- default:
- throw BadCharException(&li, "RNY::getRNY(int,int;int,alph): Specified base unknown.");
- }
-
- r *= 4;
- s *= 2;
-
- switch (lj)
- {
- case 'A':
- r += 0;
- break;
- case 'G':
- r += 1;
- break;
- case 'C':
- r += 2;
- break;
- case 'T':
- r += 3;
- break;
- case '-':
- case 'N':
- s += 1;
- break;
- default:
- throw BadCharException(&lj, "RNY::getRNY(int,int;int,alph): Specified base unknown.");
- }
-
- r *= 3;
- s *= 2;
-
- switch (lk)
- {
- case 'A':
- r += 0;
- break;
- case 'G':
- r += 1;
- break;
- case 'C':
- case 'T':
- r += 2;
- break;
- case '-':
- case 'N':
- s += 1;
- break;
- default:
- throw BadCharException(&lk, "RNY::getRNY(int,int;int,alph): Specified base unknown.");
- }
-
- return 50 * s + r;
-}
-
-/****************************************************************************************/
-bool RNY::isGap(int state) const
-{
- return state==350;
-}
-
-bool RNY::containsGap(const string& state) const throw (BadCharException)
-{
- return (state.find("-")!=string::npos);
-}
-
-bool RNY::isUnresolved(const string& state) const
-{
- return containsGap(state);
-}
-
-bool RNY::isUnresolved(int state) const
-{
- return (state>=50 && state!=350);
-}
-
-/****************************************************************************************/
-
-int RNY::charToInt(const string& state) const throw (BadCharException)
-{
- if (state.size() != 3) throw BadCharException(state, "RNY::charToInt", this);
- else
- return AbstractAlphabet::charToInt(state);
-}
-
-
-/************************************************************/
-
-string RNY::intToChar(int state) const throw (BadIntException)
-{
- unsigned int i,j,k,l;
- for (i = 0; i < 3; i++)
- {
- for (j = 0; j < 4; j++)
- {
- for (k = 0; k < 3; k++)
- {
- l = i * 12 + j * 3 + k;
- if (getState(l).getNum() == state)
- return getState(l).getLetter();
- }
- }
- }
-
- // NN- (50->83)
-
- for (i = 0; i < 3; i++)
- {
- for (j = 0; j < 4; j++)
- {
- l = 50 + 12 * i + j * 3;
- if (getState(l).getNum() == state)
- return getState(l).getLetter();
- }
- }
-
- // N-N (100->126)
-
- for (i = 0; i < 3; i++)
- {
- for (k = 0; k < 3; k++)
- {
- l = 100 + 12 * i + k;
- if (getState(l).getNum() == state)
- return getState(l).getLetter();
- }
- }
-
- // N-- (150->152)
-
- for (i = 0; i < 3; i++)
- {
- l = 150 + 12 * i;
- if (getState(l).getNum() == state)
- return getState(l).getLetter();
- }
-
- // -NN (200->211)
-
- for (j = 0; j < 4; j++)
- {
- for (k = 0; k < 3; k++)
- {
- l = 200 + j * 3 + k;
- if (getState(l).getNum() == state)
- return getState(l).getLetter();
- }
- }
-
-
- // -N- (250->253)
-
- for (j = 0; j < 4; j++)
- {
- l = 250 + 3 * j;
- if (getState(l).getNum() == state)
- return getState(l).getLetter();
- }
-
- // --N (300->302)
-
- for (k = 0; k < 3; k++)
- {
- l = 300 + k;
- if (getState(l).getNum() == state)
- return getState(l).getLetter();
- }
-
-
- // --- (350)
-
- l = 350;
- if (getState(l).getNum() == state)
- return getState(l).getLetter();
-
- throw BadIntException(state, "RNY::intToChar: Specified base unknown", this);
- return "XXX";
-}
+//
+// File: RNY.cpp
+// Created by: Laurent Gueguen
+// Created on: Tue Jul 31 2007
+//
+
+/*
+ Copyright or � or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide
+ classes for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided
+ only with a limited warranty and the software's author, the holder of
+ the economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards
+ their requirements in conditions enabling the security of their
+ systems and/or data to be ensured and, more generally, to use and
+ operate it in the same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+ */
+
+#include "RNY.h" // class's header file
+
+// From Utils:
+#include <Bpp/Text/TextTools.h>
+#include "AlphabetTools.h"
+
+#include <iostream>
+using namespace std;
+using namespace bpp;
+
+/****************************************************************************************/
+
+RNY::RNY(const NucleicAlphabet& na) : nuclalph_(na)
+{
+ // Initialization:
+ vector<AlphabetState*> states;
+ for (int i = 0; i < 351; ++i)
+ {
+ states.push_back(new AlphabetState(i, TextTools::toString(i), ""));
+ }
+
+ // Alphabet content definition:
+
+ string s1;
+
+ if (AlphabetTools::isDNAAlphabet(&na))
+ s1 = "RCT-";
+ else
+ s1 = "RCU-";
+
+ string s2;
+
+ if (AlphabetTools::isDNAAlphabet(&na))
+ s2 = "AGCT-";
+ else
+ s2 = "AGCU-";
+
+ string s3 = "AGY-";
+ string s = " ";
+
+
+ // NNN (0->35)
+
+ for (size_t i = 0; i < 3; ++i)
+ {
+ for (size_t j = 0; j < 4; ++j)
+ {
+ for (size_t k = 0; k < 3; ++k)
+ {
+ size_t l = i * 12 + j * 3 + k;
+ s[0] = s1[i];
+ s[1] = s2[j];
+ s[2] = s3[k];
+ states[l] = new AlphabetState(static_cast<int>(l), s, s);
+ }
+ }
+ }
+
+ // NN- (50->83)
+
+ for (size_t i = 0; i < 3; ++i)
+ {
+ for (size_t j = 0; j < 4; ++j)
+ {
+ size_t l = 50 + 12 * i + j * 3;
+ s[0] = s1[i];
+ s[1] = s2[j];
+ s[2] = s3[3];
+ states[l] = new AlphabetState(static_cast<int>(l), s, s);
+ }
+ }
+
+ // N-N (100->126)
+
+ for (size_t i = 0; i < 3; ++i)
+ {
+ for (size_t k = 0; k < 3; ++k)
+ {
+ size_t l = 100 + 12 * i + k;
+ s[0] = s1[i];
+ s[1] = s2[4];
+ s[2] = s3[k];
+ states[l] = new AlphabetState(static_cast<int>(l), s, s);
+ }
+ }
+
+ // N-- (150->152)
+
+ for (size_t i = 0; i < 3; ++i)
+ {
+ size_t l = 150 + 12 * i;
+ s[0] = s1[i];
+ s[1] = s2[4];
+ s[2] = s3[3];
+ states[l] = new AlphabetState(static_cast<int>(l), s, s);
+ }
+
+ // -NN (200->211)
+
+ for (size_t j = 0; j < 4; ++j)
+ {
+ for (size_t k = 0; k < 3; ++k)
+ {
+ size_t l = 200 + j * 3 + k;
+ s[0] = s1[3];
+ s[1] = s2[j];
+ s[2] = s3[k];
+ states[l] = new AlphabetState(static_cast<int>(l), s, s);
+ }
+ }
+
+
+ // -N- (250->253)
+
+ for (size_t j = 0; j < 4; ++j)
+ {
+ size_t l = 250 + 3 * j;
+ s[0] = s1[3];
+ s[1] = s2[j];
+ s[2] = s3[3];
+ states[l] = new AlphabetState(static_cast<int>(l), s, s);
+ }
+
+ // --N (300->302)
+
+ for (size_t k = 0; k < 3; ++k)
+ {
+ size_t l = 300 + k;
+ s[0] = s1[3];
+ s[1] = s2[4];
+ s[2] = s3[k];
+ states[l] = new AlphabetState(static_cast<int>(l), s, s);
+ }
+
+
+ // --- (350)
+
+ s[0] = s1[3];
+ s[1] = s2[4];
+ s[2] = s3[3];
+ states[350] = new AlphabetState(350, s, s);
+
+ // Register all states:
+ for (size_t i = 0; i < states.size(); ++i)
+ registerState(states[i]);
+}
+
+/****************************************************************************************/
+
+vector<int> RNY::getAlias(int state) const throw (BadIntException)
+{
+ if (!isIntInAlphabet(state))
+ throw BadIntException(state, "RNY::getAlias(int): Specified base unknown.");
+ vector<int> v;
+
+ int qs = state / 50;
+ int rs = state % 50;
+ int i, j, k;
+
+ switch (qs)
+ {
+ case 0: // NNN
+ v.resize(1);
+ v[0] = rs;
+ break;
+ case 1: // NN-
+ v.resize(3);
+ for (k = 0; k < 3; ++k)
+ {
+ v[static_cast<size_t>(k)] = k + rs;
+ }
+ break;
+ case 2: // N-N
+ v.resize(4);
+ for (j = 0; j < 4; ++j)
+ {
+ v[static_cast<size_t>(j)] = 3 * j + rs;
+ }
+ break;
+ case 3: // N--
+ v.resize(12);
+ for (j = 0; j < 4; ++j)
+ {
+ for (k = 0; k < 3; ++k)
+ {
+ v[static_cast<size_t>(3 * j + k)] = rs + 3 * j + k;
+ }
+ }
+ break;
+ case 4: // -NN
+ v.resize(3);
+ for (i = 0; i < 3; ++i)
+ {
+ v[static_cast<size_t>(i)] = 12 * i + rs;
+ }
+ break;
+ case 5: // -N-
+ v.resize(9);
+ for (i = 0; i < 3; ++i)
+ {
+ for (k = 0; k < 3; ++k)
+ {
+ v[static_cast<size_t>(3 * i + k)] = rs + 12 * i + k;
+ }
+ }
+ break;
+ case 6: // --N
+ v.resize(12);
+ for (i = 0; i < 3; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ v[static_cast<size_t>(4 * i + j)] = rs + 12 * i + 3 * j;
+ }
+ }
+ break;
+ case 7: // ---
+ v.resize(36);
+ for (i = 0; i < 3; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ for (k = 0; k < 3; ++k)
+ {
+ v[static_cast<size_t>(12 * i + 3 * j + k)] = 12 * i + 3 * j + k;
+ }
+ }
+ }
+ break;
+ }
+ return v;
+}
+
+const NucleicAlphabet& RNY::getLetterAlphabet() const
+{
+ return nuclalph_;
+}
+
+/****************************************************************************************/
+
+vector<string> RNY::getAlias(const string& state) const throw (BadCharException)
+{
+ if (!isCharInAlphabet(state))
+ throw BadCharException(state, "RNY::getAlias(int): Specified base unknown.");
+
+ vector<int> v = getAlias(charToInt(state));
+ vector<string> s;
+ size_t size = v.size();
+ s.resize(size);
+
+ for (size_t i = 0; i < size; i++)
+ {
+ s[i] = AbstractAlphabet::intToChar(v[i]);
+ }
+ return s;
+}
+
+/****************************************************************************************/
+
+string RNY::getRNY(const string& pos1, const string& pos2, const string& pos3) const throw (BadCharException)
+{
+ string tr;
+
+ if (pos1 == "A" || pos1 == "G")
+ tr = "R";
+ else
+ tr = pos1;
+
+ tr += pos2;
+
+ if (pos3 == "T" || pos3 == "U" || pos3 == "C")
+ tr += "Y";
+ else
+ tr += pos3;
+
+ // teste triplet;
+ charToInt(tr);
+ return tr;
+}
+
+/**************************************************************************************/
+int RNY::getRNY(int i, int j, int k, const Alphabet& alph) const throw (BadCharException)
+{
+ if (! AlphabetTools::isNucleicAlphabet(&alph))
+ {
+ throw AlphabetException ("RNY::getRNY : Sequence must be Nucleic",
+ &alph);
+ }
+
+ char li = alph.intToChar(i)[0];
+ char lj = alph.intToChar(j)[0];
+ char lk = alph.intToChar(k)[0];
+
+ int r = 0;
+ int s = 0;
+
+ switch (li)
+ {
+ case 'A':
+ case 'G':
+ r += 0;
+ break;
+ case 'C':
+ r += 1;
+ break;
+ case 'T':
+ case 'U':
+ r += 2;
+ break;
+ case '-':
+ case 'N':
+ s += 1;
+ break;
+ default:
+ throw BadCharException(&li, "RNY::getRNY(int,int;int,alph): Specified base unknown.");
+ }
+
+ r *= 4;
+ s *= 2;
+
+ switch (lj)
+ {
+ case 'A':
+ r += 0;
+ break;
+ case 'G':
+ r += 1;
+ break;
+ case 'C':
+ r += 2;
+ break;
+ case 'T':
+ case 'U':
+ r += 3;
+ break;
+ case '-':
+ case 'N':
+ s += 1;
+ break;
+ default:
+ throw BadCharException(&lj, "RNY::getRNY(int,int;int,alph): Specified base unknown.");
+ }
+
+ r *= 3;
+ s *= 2;
+
+ switch (lk)
+ {
+ case 'A':
+ r += 0;
+ break;
+ case 'G':
+ r += 1;
+ break;
+ case 'C':
+ case 'T':
+ case 'U':
+ r += 2;
+ break;
+ case '-':
+ case 'N':
+ s += 1;
+ break;
+ default:
+ throw BadCharException(&lk, "RNY::getRNY(int,int;int,alph): Specified base unknown.");
+ }
+
+ return 50 * s + r;
+}
+
+/****************************************************************************************/
+bool RNY::isGap(int state) const
+{
+ return state == 350;
+}
+
+bool RNY::containsGap(const string& state) const throw (BadCharException)
+{
+ return state.find("-") != string::npos;
+}
+
+bool RNY::isUnresolved(const string& state) const
+{
+ return containsGap(state);
+}
+
+bool RNY::isUnresolved(int state) const
+{
+ return state >= 50 && state != 350;
+}
+
+/****************************************************************************************/
+
+int RNY::charToInt(const string& state) const throw (BadCharException)
+{
+ if (state.size() != 3)
+ throw BadCharException(state, "RNY::charToInt", this);
+ else
+ return AbstractAlphabet::charToInt(state);
+}
+
+
+/************************************************************/
+
+string RNY::intToChar(int state) const throw (BadIntException)
+{
+ int i, j, k, l;
+ for (i = 0; i < 3; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ for (k = 0; k < 3; ++k)
+ {
+ l = i * 12 + j * 3 + k;
+ if (getState(l).getNum() == state)
+ return getState(l).getLetter();
+ }
+ }
+ }
+
+ // NN- (50->83)
+
+ for (i = 0; i < 3; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ l = 50 + 12 * i + j * 3;
+ if (getState(l).getNum() == state)
+ return getState(l).getLetter();
+ }
+ }
+
+ // N-N (100->126)
+
+ for (i = 0; i < 3; ++i)
+ {
+ for (k = 0; k < 3; ++k)
+ {
+ l = 100 + 12 * i + k;
+ if (getState(l).getNum() == state)
+ return getState(l).getLetter();
+ }
+ }
+
+ // N-- (150->152)
+
+ for (i = 0; i < 3; ++i)
+ {
+ l = 150 + 12 * i;
+ if (getState(l).getNum() == state)
+ return getState(l).getLetter();
+ }
+
+ // -NN (200->211)
+
+ for (j = 0; j < 4; ++j)
+ {
+ for (k = 0; k < 3; ++k)
+ {
+ l = 200 + j * 3 + k;
+ if (getState(l).getNum() == state)
+ return getState(l).getLetter();
+ }
+ }
+
+
+ // -N- (250->253)
+
+ for (j = 0; j < 4; ++j)
+ {
+ l = 250 + 3 * j;
+ if (getState(l).getNum() == state)
+ return getState(l).getLetter();
+ }
+
+ // --N (300->302)
+
+ for (k = 0; k < 3; ++k)
+ {
+ l = 300 + k;
+ if (getState(l).getNum() == state)
+ return getState(l).getLetter();
+ }
+
+
+ // --- (350)
+
+ l = 350;
+ if (getState(l).getNum() == state)
+ return getState(l).getLetter();
+
+ throw BadIntException(state, "RNY::intToChar: Specified base unknown", this);
+ return "XXX";
+}
diff --git a/src/Bpp/Seq/Alphabet/RNY.h b/src/Bpp/Seq/Alphabet/RNY.h
index 8fd713c..63c30a2 100644
--- a/src/Bpp/Seq/Alphabet/RNY.h
+++ b/src/Bpp/Seq/Alphabet/RNY.h
@@ -69,6 +69,21 @@ private:
public:
RNY(const NucleicAlphabet&);
+
+ RNY(const RNY& bia) : AbstractAlphabet(bia), nuclalph_(bia.nuclalph_) {}
+
+ RNY& operator=(const RNY& bia)
+ {
+ AbstractAlphabet::operator=(bia);
+// nuclalph_=bia.nuclalph_;
+ return *this;
+ }
+
+ RNY* clone() const
+ {
+ return new RNY(*this);
+ }
+
~RNY() {}
public:
diff --git a/src/Bpp/Seq/Alphabet/StandardCodonAlphabet.cpp b/src/Bpp/Seq/Alphabet/StandardCodonAlphabet.cpp
deleted file mode 100644
index 42f6617..0000000
--- a/src/Bpp/Seq/Alphabet/StandardCodonAlphabet.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//
-// File: StandardCodonAlphabet.h
-// Authors: Julien Dutheil
-// Sylvain Gaillard
-// Created on: Sun Oct 12 17:51:36 2003
-//
-
-/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
-*/
-
-#include "StandardCodonAlphabet.h"
-
-using namespace bpp;
-using namespace std;
-
-StandardCodonAlphabet::StandardCodonAlphabet(const NucleicAlphabet* alpha) :
- CodonAlphabet(alpha)
-{
- string A = alpha->intToChar(0);
- string G = alpha->intToChar(2);
- string T = alpha->intToChar(3);
-
- vector<string> vstop;
-
- vstop.push_back(T + A + A);
- vstop.push_back(T + A + G);
- vstop.push_back(T + G + A);
-
- int istop;
- unsigned int j;
- for (unsigned int i = 0 ; i < vstop.size() ; i++) {
- istop = charToInt(vstop[i]);
- stopCodons_.push_back(istop);
- j = 0;
- while (j < getNumberOfChars()) {
- if (getStateAt(j).getNum() == istop) {
- getStateAt(j).setName(STOP);
- break;
- }
- j++;
- }
- }
- initCodon_ = charToInt(A + T + G);
-}
-
diff --git a/src/Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.cpp b/src/Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.cpp
deleted file mode 100644
index a261c92..0000000
--- a/src/Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-//
-// File: VertebrateMitochondrialCodonAlphabet.h
-// Authors: Eric Bazin
-// Sylvain Gaillard
-// Created on: thu mar 1 14:25:09 CET 2005
-//
-
-/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
-*/
-
-#include "VertebrateMitochondrialCodonAlphabet.h"
-
-using namespace bpp;
-using namespace std;
-
-VertebrateMitochondrialCodonAlphabet::VertebrateMitochondrialCodonAlphabet(const NucleicAlphabet* alpha) : CodonAlphabet(alpha)
-{
- string A = alpha->intToChar(0);
- string G = alpha->intToChar(2);
- string T = alpha->intToChar(3);
-
- vector<string> vstop;
-
- vstop.push_back(T + A + A);
- vstop.push_back(T + A + G);
- vstop.push_back(A + G + G);
- vstop.push_back(A + G + A);
-
- int istop;
- unsigned int j;
- for (unsigned int i = 0 ; i < vstop.size() ; i++) {
- istop = charToInt(vstop[i]);
- stopCodons_.push_back(istop);
-
- j = 0;
- while (j < getNumberOfChars()) {
- if (getStateAt(j).getNum() == istop) {
- getStateAt(j).setName(STOP);
- break;
- }
- j++;
- }
- }
- initCodon_ = charToInt(A + T + G);
-}
-
diff --git a/src/Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.h b/src/Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.h
deleted file mode 100644
index e241c5d..0000000
--- a/src/Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//
-// File: VertebrateMitochondrialCodonAlphabet.h
-// Created by: Eric Bazin
-// Created on: wen mar 2 16:01:59 CET 2005
-///
-
-/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
-*/
-
-#ifndef _VERTEBRATEMITOCHONDRIALCODONALPHABET_H_
-#define _VERTEBRATEMITOCHONDRIALCODONALPHABET_H_
-
-#include "CodonAlphabet.h"
-
-namespace bpp
-{
-
-/**
- * @brief This class implements the vertebrate mitochondrial codon alphabet as describe on the NCBI
- * web site: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG2
- * @author Laurent Guéguen
- */
-
-class VertebrateMitochondrialCodonAlphabet : public CodonAlphabet
-{
-public:
- VertebrateMitochondrialCodonAlphabet(const NucleicAlphabet * alpha);
- virtual ~VertebrateMitochondrialCodonAlphabet() {};
-
-public:
-
- std::string getAlphabetType() const
- {
- return "Codon alphabet: VertebrateMitochondrial(" + vAbsAlph_[0]->getAlphabetType() + ")";
- }
-};
-
-} //end of namespace bpp.
-
-#endif //_VERTEBRATEMITOCHONDRIALCODONALPHABET_H_
-
diff --git a/src/Bpp/Seq/Alphabet/WordAlphabet.cpp b/src/Bpp/Seq/Alphabet/WordAlphabet.cpp
index cb78f32..2008f7f 100644
--- a/src/Bpp/Seq/Alphabet/WordAlphabet.cpp
+++ b/src/Bpp/Seq/Alphabet/WordAlphabet.cpp
@@ -6,7 +6,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -69,59 +69,65 @@ WordAlphabet::WordAlphabet(const Alphabet* pAlpha, unsigned int num) :
void WordAlphabet::build_()
{
- unsigned int size = 1;
+ size_t size = 1;
- for (unsigned int i = 0; i < vAbsAlph_.size(); i++)
+ for (size_t i = 0; i < vAbsAlph_.size(); ++i)
{
size *= vAbsAlph_[i]->getSize();
}
- resize(size + 2);
+ vector<AlphabetState*> states(size + 2);
string s = "";
- for (unsigned int i = 0; i < vAbsAlph_.size(); i++)
+ for (size_t i = 0; i < vAbsAlph_.size(); ++i)
{
s += "-";
}
- setState(0, AlphabetState(-1, s, "gap"));
+ states[0] = new AlphabetState(-1, s, "gap");
- for (unsigned int i = 0; i < size; i++)
+ for (size_t i = 0; i < size; ++i)
{
- setState(i + 1, AlphabetState(i, "", ""));
+ states[i + 1] = new AlphabetState(static_cast<int>(i), "", "");
}
- unsigned lr = size;
+ size_t lr = size;
char c;
- for (unsigned int na = 0; na < vAbsAlph_.size(); na++)
+ for (size_t na = 0; na < vAbsAlph_.size(); ++na)
{
lr /= vAbsAlph_[na]->getSize();
- unsigned int j = 1;
- unsigned int i = 0;
+ size_t j = 1;
+ int i = 0;
while (j <= size)
{
c = vAbsAlph_[na]->intToChar(i)[0];
- for (unsigned int k = 0; k < lr; k++)
+ for (size_t k = 0; k < lr; k++)
{
- getStateAt(j).setLetter(getStateAt(j).getLetter() + c);
+ states[j]->setLetter(states[j]->getLetter() + c);
j++;
// alphabet[j++].letter += c;
}
- if (++i == vAbsAlph_[na]->getSize())
+ if (++i == static_cast<int>(vAbsAlph_[na]->getSize()))
i = 0;
}
}
s = "";
- for (unsigned i = 0; i < vAbsAlph_.size(); i++)
+ for (size_t i = 0; i < vAbsAlph_.size(); ++i)
{
s += "N";
}
- setState(size + 1, AlphabetState(size, s, "Unresolved"));
- remap();
+ states[size + 1] = new AlphabetState(static_cast<int>(size), s, "Unresolved");
+
+ //Now register all states once for all:
+ for (size_t i = 0; i < states.size(); ++i) {
+ registerState(states[i]);
+ }
+ //jdutheil on 24/07/14: this should not be necessary anymore.
+ //remap();
}
/******************************************************************************/
@@ -202,14 +208,14 @@ std::vector<int> WordAlphabet::getAlias(int state) const throw (BadIntException)
if (!isIntInAlphabet(state))
throw BadIntException(state, "WordAlphabet::getAlias(int): Specified base unknown.");
vector<int> v;
- int i, s = getSize();
+ size_t s = getSize();
- if (state == s)
+ if (static_cast<size_t>(state) == s)
{
v.resize(s);
- for (i = 0; i < s; i++)
+ for (size_t i = 0; i < s; ++i)
{
- v[i] = i;
+ v[i] = static_cast<int>(i);
}
}
else
@@ -228,10 +234,10 @@ std::vector<std::string> WordAlphabet::getAlias(const std::string& state) const
throw BadCharException(locstate, "WordAlphabet::getAlias(string): Specified base unknown.");
vector<string> v;
- unsigned int i, s = getSize();
+ size_t s = getSize();
string st = "";
- for (i = 0; i < vAbsAlph_.size(); i++)
+ for (size_t i = 0; i < vAbsAlph_.size(); ++i)
{
st += "N";
}
@@ -239,9 +245,9 @@ std::vector<std::string> WordAlphabet::getAlias(const std::string& state) const
if (locstate == st)
{
v.resize(s);
- for (i = 0; i < s; i++)
+ for (size_t i = 0; i < s; ++i)
{
- v[i] = intToChar(i);
+ v[i] = intToChar(static_cast<int>(i));
}
}
else
@@ -313,7 +319,7 @@ Sequence* WordAlphabet::translate(const Sequence& sequence, size_t pos) const th
unsigned int l = getLength();
size_t i = pos;
- while (i + l < s)
+ while (i + l <= s)
{
v2.push_back(getWord(v1, i));
i += l;
diff --git a/src/Bpp/Seq/Alphabet/WordAlphabet.h b/src/Bpp/Seq/Alphabet/WordAlphabet.h
index 5bec05c..e03d219 100644
--- a/src/Bpp/Seq/Alphabet/WordAlphabet.h
+++ b/src/Bpp/Seq/Alphabet/WordAlphabet.h
@@ -90,6 +90,20 @@ public:
*/
WordAlphabet(const Alphabet* pAlpha, unsigned int num);
+ WordAlphabet(const WordAlphabet& bia) : AbstractAlphabet(bia), vAbsAlph_(bia.vAbsAlph_) {}
+
+ WordAlphabet& operator=(const WordAlphabet& bia)
+ {
+ AbstractAlphabet::operator=(bia);
+ vAbsAlph_=bia.vAbsAlph_;
+ return *this;
+ }
+
+ WordAlphabet* clone() const
+ {
+ return new WordAlphabet(*this);
+ }
+
virtual ~WordAlphabet() {}
public:
@@ -115,7 +129,7 @@ public:
if (state.size() != vAbsAlph_.size())
throw BadCharException(state, "WordAlphabet::charToInt", this);
if (containsUnresolved(state))
- return getSize();
+ return static_cast<int>(getSize());
if (containsGap(state))
return -1;
else return AbstractAlphabet::charToInt(state);
@@ -155,9 +169,10 @@ public:
}
std::string getAlphabetType() const;
+
int getUnknownCharacterCode() const
{
- return getSize();
+ return static_cast<int>(getSize());
}
bool isUnresolved(int state) const { return state == getUnknownCharacterCode(); }
@@ -264,7 +279,7 @@ public:
* @param n The position in the word (starting at 0).
* @return The char description of the n-position of the word.
*/
- std::string getNPosition (const std::string& word, size_t n) const throw (BadCharException)
+ std::string getNPosition(const std::string& word, size_t n) const throw (BadCharException)
{
if (n > vAbsAlph_.size())
throw BadCharException("", "WordAlphabet::getNPosition", this);
diff --git a/src/Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.cpp b/src/Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.cpp
deleted file mode 100644
index e4c26a9..0000000
--- a/src/Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-//
-// File: YeastbrateMitochondrialCodonAlphabet.cpp
-// Created by: Benoit Nabholz
-// Created on: Sun Oct 10 14:33 CET 2010
-//
-
-/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
-*/
-
-#include "YeastMitochondrialCodonAlphabet.h"
-
-using namespace bpp;
-using namespace std;
-
-YeastMitochondrialCodonAlphabet::YeastMitochondrialCodonAlphabet(const NucleicAlphabet * alpha) : CodonAlphabet(alpha)
-{
- string A = alpha->intToChar(0);
- string G = alpha->intToChar(2);
- string T = alpha->intToChar(3);
-
- vector<string> vstop;
-
- vstop.push_back(T + A + A);
- vstop.push_back(T + A + G);
-
- int istop;
- unsigned int j;
- for (unsigned int i = 0 ; i < vstop.size() ; i++) {
- istop = charToInt(vstop[i]);
- stopCodons_.push_back(istop);
-
- j = 0;
- while (j < getNumberOfChars()) {
- if (getStateAt(j).getNum() == istop) {
- getStateAt(j).setName(STOP);
- break;
- }
- j++;
- }
- }
-}
-
diff --git a/src/Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.h b/src/Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.h
deleted file mode 100644
index ce67912..0000000
--- a/src/Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.h
+++ /dev/null
@@ -1,75 +0,0 @@
-//
-// File: YeastbrateMitochondrialCodonAlphabet.h
-// Created by: Benoit Nabholz
-// Created on: Sun Oct 10 14:33 CET 2010
-//
-
-/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
-*/
-
-#ifndef _YEASTMITOCHONDRIALCODONALPHABET_H_
-#define _YEASTMITOCHONDRIALCODONALPHABET_H_
-
-
-#include "CodonAlphabet.h"
-
-namespace bpp
-{
-
-/**
- * @brief This class implements the Yeast
- * @author Laurent Guéguen
- *
- * Mitochondrial codon alphabet as describe on the NCBI website:
- * http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG3
- */
-class YeastMitochondrialCodonAlphabet:
- public CodonAlphabet
-{
-public:
- YeastMitochondrialCodonAlphabet(const NucleicAlphabet * alpha);
- virtual ~YeastMitochondrialCodonAlphabet() {};
-
-public:
-
- std::string getAlphabetType() const
- {
- return "Codon alphabet: YeastMitochondrial(" + vAbsAlph_[0]->getAlphabetType() + ")";
- }
-
-};
-
-} //end of namespace bpp.
-
-#endif //_YEASTMITOCHONDRIALCODONALPHABET_H_
-
diff --git a/src/Bpp/Seq/AlphabetIndex/AAChargeIndex.h b/src/Bpp/Seq/AlphabetIndex/AAChargeIndex.h
index a388987..c722b9b 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAChargeIndex.h
+++ b/src/Bpp/Seq/AlphabetIndex/AAChargeIndex.h
@@ -133,12 +133,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "AAChargeIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return charge_[state];
+ return charge_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return charge_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return charge_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(charge_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AAChenGuHuangHydrophobicityIndex.h b/src/Bpp/Seq/AlphabetIndex/AAChenGuHuangHydrophobicityIndex.h
index 2c43615..f83acaf 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAChenGuHuangHydrophobicityIndex.h
+++ b/src/Bpp/Seq/AlphabetIndex/AAChenGuHuangHydrophobicityIndex.h
@@ -104,12 +104,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "AAChenGuHuangHydrophobicityIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return hydrophobicity_[state];
+ return hydrophobicity_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return hydrophobicity_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return hydrophobicity_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(hydrophobicity_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AAChouFasmanAHelixIndex.h b/src/Bpp/Seq/AlphabetIndex/AAChouFasmanAHelixIndex.h
index dc2ade5..c2ff6e3 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAChouFasmanAHelixIndex.h
+++ b/src/Bpp/Seq/AlphabetIndex/AAChouFasmanAHelixIndex.h
@@ -92,12 +92,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "AAChouFasmanAHelixIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return aHelix_[state];
+ return aHelix_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return aHelix_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return aHelix_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(aHelix_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AAChouFasmanBSheetIndex.h b/src/Bpp/Seq/AlphabetIndex/AAChouFasmanBSheetIndex.h
index 7747860..db1040b 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAChouFasmanBSheetIndex.h
+++ b/src/Bpp/Seq/AlphabetIndex/AAChouFasmanBSheetIndex.h
@@ -92,12 +92,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "AAChouFasmanBSheetIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return bSheet_[state];
+ return bSheet_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return bSheet_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return bSheet_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(bSheet_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AAChouFasmanTurnIndex.h b/src/Bpp/Seq/AlphabetIndex/AAChouFasmanTurnIndex.h
index a0bbbd7..d19022a 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAChouFasmanTurnIndex.h
+++ b/src/Bpp/Seq/AlphabetIndex/AAChouFasmanTurnIndex.h
@@ -92,12 +92,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "AAChouFasmanTurnIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return turn_[state];
+ return turn_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return turn_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return turn_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(turn_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.cpp b/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.cpp
index 07a7725..d6edd0e 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.cpp
+++ b/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.cpp
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
diff --git a/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.h b/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.h
index 1e12f00..4500772 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.h
+++ b/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.h
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -84,12 +84,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "KleinAANetChargeIndex::getIndex(). Invalid state.", alpha_);
- return property_[state];
+ return property_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return property_[alpha_->charToInt(state)];
+ return property_[static_cast<size_t>(alpha_->charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(property_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.cpp b/src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.cpp
index 5daf635..fa4a554 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.cpp
+++ b/src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.cpp
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -49,7 +49,8 @@ using namespace std;
AAIndex2Entry::AAIndex2Entry(std::istream& input, bool sym) throw (IOException) :
property_(20, 20),
- alpha_(&AlphabetTools::PROTEIN_ALPHABET)
+ alpha_(&AlphabetTools::PROTEIN_ALPHABET),
+ sym_(sym)
{
// Parse entry:
string line;
@@ -60,7 +61,7 @@ AAIndex2Entry::AAIndex2Entry(std::istream& input, bool sym) throw (IOException)
line = FileTools::getNextLine(input);
if (line[0] == 'M')
{
- for (unsigned int i = 0; i < 20; i++)
+ for (size_t i = 0; i < 20; ++i)
{
line = FileTools::getNextLine(input);
StringTokenizer st1(line, " ");
@@ -74,7 +75,7 @@ AAIndex2Entry::AAIndex2Entry(std::istream& input, bool sym) throw (IOException)
{
if (st1.numberOfRemainingTokens() != i + 1)
break;
- for (unsigned int j = 0; j <= i; j++)
+ for (size_t j = 0; j <= i; ++j)
{
property_(i, j) = TextTools::toDouble(st1.nextToken());
}
@@ -83,7 +84,7 @@ AAIndex2Entry::AAIndex2Entry(std::istream& input, bool sym) throw (IOException)
{
if (st1.numberOfRemainingTokens() != 20)
break;
- for (unsigned int j = 0; j < 20; j++)
+ for (size_t j = 0; j < 20; ++j)
{
property_(i, j) = TextTools::toDouble(st1.nextToken());
}
@@ -97,5 +98,7 @@ AAIndex2Entry::AAIndex2Entry(std::istream& input, bool sym) throw (IOException)
while (!ok);
if (!ok)
throw IOException("AAIndex2Entry: invalid AAIndex2 entry.");
+ if (!diag)
+ sym_ = false;
}
diff --git a/src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.h b/src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.h
index 2398e0b..cad39e3 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.h
+++ b/src/Bpp/Seq/AlphabetIndex/AAIndex2Entry.h
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -55,6 +55,7 @@ class AAIndex2Entry :
private:
LinearMatrix<double> property_;
const ProteicAlphabet* alpha_;
+ bool sym_;
public:
/**
@@ -65,19 +66,22 @@ public:
* This option as an effect only if the matrix is specified as a triangle in the entry.
* If sym==true, the oher triangle will be built by symmetry.
* If sym==false, the other triangle will be set to (-) the given triangle.
+ * If the input matrix is square, it will be considered non-symetric.
* @throw IOException if the stream content does not follow the AAIndex2 database entry format.
*/
AAIndex2Entry(std::istream& input, bool sym = true) throw (IOException);
AAIndex2Entry(const AAIndex2Entry& index) :
property_(index.property_),
- alpha_(index.alpha_)
+ alpha_(index.alpha_),
+ sym_(index.sym_)
{}
AAIndex2Entry& operator=(const AAIndex2Entry& index)
{
property_ = index.property_;
alpha_ = index.alpha_;
+ sym_ = index.sym_;
return *this;
}
@@ -90,17 +94,22 @@ public:
double getIndex(int state1, int state2) const throw (BadIntException)
{
- if (state1 < 0 || state1 > 19) throw BadIntException(state1, "AAIndex2Entry::getIndex(). Invalid state1.", alpha_);
- if (state2 < 0 || state2 > 19) throw BadIntException(state2, "AAIndex2Entry::getIndex(). Invalid state2.", alpha_);
- return property_(state1, state2);
+ size_t stateIndex1 = alpha_->getStateIndex(state1);
+ size_t stateIndex2 = alpha_->getStateIndex(state2);
+ return property_(stateIndex1, stateIndex2);
}
double getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException)
{
- return property_(alpha_->charToInt(state1), alpha_->charToInt(state2));
+ size_t stateIndex1 = alpha_->getStateIndex(state1);
+ size_t stateIndex2 = alpha_->getStateIndex(state2);
+ return property_(stateIndex1, stateIndex2);
}
LinearMatrix<double>* getIndexMatrix() const { return new LinearMatrix<double>(property_); }
+
+ bool isSymmetric() const { return sym_; }
+
};
} // end of namespace bpp.
diff --git a/src/Bpp/Seq/AlphabetIndex/AAMassIndex.h b/src/Bpp/Seq/AlphabetIndex/AAMassIndex.h
index e71c634..4eb4618 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAMassIndex.h
+++ b/src/Bpp/Seq/AlphabetIndex/AAMassIndex.h
@@ -48,8 +48,6 @@ namespace bpp
{
/**
* @brief Mass (dalton) of each amino acid, according to http://www.imb-jena.de/IMAGE_AA.html.
- *
- *
*/
class AAMassIndex :
public AlphabetIndex1
@@ -92,12 +90,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "AAMassIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return mass_[state];
+ return mass_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return mass_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return mass_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(mass_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AASEA1030Index.h b/src/Bpp/Seq/AlphabetIndex/AASEA1030Index.h
index 5a295a1..ec52747 100644
--- a/src/Bpp/Seq/AlphabetIndex/AASEA1030Index.h
+++ b/src/Bpp/Seq/AlphabetIndex/AASEA1030Index.h
@@ -48,8 +48,6 @@ namespace bpp
{
/**
* @brief Percentage of amino acids having a Solvent Exposed Area between 10 and 30 Angström^2 for each type of amino acid, according to http://prowl.rockefeller.edu/aainfo/access.htm.
- *
- *
*/
class AASEA1030Index :
public AlphabetIndex1
@@ -92,12 +90,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "AASEA1030Index::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return sea1030_[state];
+ return sea1030_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return sea1030_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return sea1030_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(sea1030_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AASEAInf10Index.h b/src/Bpp/Seq/AlphabetIndex/AASEAInf10Index.h
index 915f975..8cdb3d1 100644
--- a/src/Bpp/Seq/AlphabetIndex/AASEAInf10Index.h
+++ b/src/Bpp/Seq/AlphabetIndex/AASEAInf10Index.h
@@ -48,8 +48,6 @@ namespace bpp
{
/**
* @brief Percentage of amino acids having a Solvent Exposed Area below 10 Angström^2 for each type of amino acid, according to http://prowl.rockefeller.edu/aainfo/access.htm.
- *
- *
*/
class AASEAInf10Index :
public AlphabetIndex1
@@ -92,12 +90,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "AASEAInf10Index::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return seaInf10_[state];
+ return seaInf10_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return seaInf10_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return seaInf10_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(seaInf10_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AASEASup30Index.h b/src/Bpp/Seq/AlphabetIndex/AASEASup30Index.h
index cfc6ad5..524ebaa 100644
--- a/src/Bpp/Seq/AlphabetIndex/AASEASup30Index.h
+++ b/src/Bpp/Seq/AlphabetIndex/AASEASup30Index.h
@@ -48,8 +48,6 @@ namespace bpp
{
/**
* @brief Percentage of amino acids having a Solvent Exposed Area above 30 Angström^2 for each type of amino acid, according to http://prowl.rockefeller.edu/aainfo/access.htm
- *
- *
*/
class AASEASup30Index :
public AlphabetIndex1
@@ -92,12 +90,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "AASEASup30Index::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return seaSup30_[state];
+ return seaSup30_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return seaSup30_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return seaSup30_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(seaSup30_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AASurfaceIndex.h b/src/Bpp/Seq/AlphabetIndex/AASurfaceIndex.h
index a259915..f62e1dd 100644
--- a/src/Bpp/Seq/AlphabetIndex/AASurfaceIndex.h
+++ b/src/Bpp/Seq/AlphabetIndex/AASurfaceIndex.h
@@ -48,8 +48,6 @@ namespace bpp
{
/**
* @brief Surface (Angström^2) of each amino acid, according to http://www.imb-jena.de/IMAGE_AA.html
- *
- *
*/
class AASurfaceIndex :
public AlphabetIndex1
@@ -92,12 +90,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "AASurfaceIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return surface_[state];
+ return surface_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return surface_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return surface_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(surface_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AAVolumeIndex.h b/src/Bpp/Seq/AlphabetIndex/AAVolumeIndex.h
index 4cedd45..c10bd55 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAVolumeIndex.h
+++ b/src/Bpp/Seq/AlphabetIndex/AAVolumeIndex.h
@@ -92,12 +92,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "AAVolumeIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return volume_[state];
+ return volume_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return volume_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return volume_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(volume_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/AlphabetIndex1.h b/src/Bpp/Seq/AlphabetIndex/AlphabetIndex1.h
index 7578ff1..8eb2602 100644
--- a/src/Bpp/Seq/AlphabetIndex/AlphabetIndex1.h
+++ b/src/Bpp/Seq/AlphabetIndex/AlphabetIndex1.h
@@ -63,6 +63,8 @@ public:
virtual ~AlphabetIndex1() {}
public:
+ virtual AlphabetIndex1* clone() const = 0;
+
/**
* @brief Get the index associated to a state.
*
diff --git a/src/Bpp/Seq/AlphabetIndex/AlphabetIndex2.h b/src/Bpp/Seq/AlphabetIndex/AlphabetIndex2.h
index bf7b29d..de34679 100644
--- a/src/Bpp/Seq/AlphabetIndex/AlphabetIndex2.h
+++ b/src/Bpp/Seq/AlphabetIndex/AlphabetIndex2.h
@@ -61,12 +61,14 @@ public:
virtual ~AlphabetIndex2() {}
public:
+ virtual AlphabetIndex2* clone() const = 0;
+
/**
* @brief Get the index associated to a pair of states.
*
* @param state1 First state to consider, as a int value.
* @param state2 Second state to consider, as a int value.
- * @return The index associated to the pair of states
+ * @return The index associated to the pair of states.
*/
virtual double getIndex(int state1, int state2) const = 0;
@@ -75,7 +77,7 @@ public:
*
* @param state1 First state to consider, as a string value.
* @param state2 Second state to consider, as a string value.
- * @return The index associated to the pair of states
+ * @return The index associated to the pair of states.
*/
virtual double getIndex(const std::string& state1, const std::string& state2) const = 0;
@@ -90,6 +92,11 @@ public:
* @return A matrix object with all indices.
*/
virtual Matrix<double>* getIndexMatrix() const = 0;
+
+ /**
+ * @return True if the index is symatric (that is, index(i,j) == index(j, i)).
+ */
+ virtual bool isSymmetric() const = 0;
};
} // end of namespace bpp.
diff --git a/src/Bpp/Seq/AlphabetIndex/BLOSUM50.cpp b/src/Bpp/Seq/AlphabetIndex/BLOSUM50.cpp
index 146e328..1094a5c 100644
--- a/src/Bpp/Seq/AlphabetIndex/BLOSUM50.cpp
+++ b/src/Bpp/Seq/AlphabetIndex/BLOSUM50.cpp
@@ -6,7 +6,7 @@
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -58,17 +58,17 @@ BLOSUM50::BLOSUM50() :
double BLOSUM50::getIndex(int state1, int state2) const
throw (BadIntException)
{
- if (state1 < 0 || state1 > 19)
- throw BadIntException(state1, "BLOSUM50::getIndex(). Invalid state1.", alpha_);
- if (state2 < 0 || state2 > 19)
- throw BadIntException(state2, "BLOSUM50::getIndex(). Invalid state2.", alpha_);
- return distanceMatrix_(state1, state2);
+ size_t stateIndex1 = alpha_->getStateIndex(state1);
+ size_t stateIndex2 = alpha_->getStateIndex(state2);
+ return distanceMatrix_(stateIndex1, stateIndex2);
}
double BLOSUM50::getIndex(const std::string& state1, const std::string& state2) const
throw (BadCharException)
{
- return distanceMatrix_(alpha_->charToInt(state1), alpha_->charToInt(state2));
+ return distanceMatrix_(
+ static_cast<size_t>(alpha_->charToInt(state1)),
+ static_cast<size_t>(alpha_->charToInt(state2)));
}
LinearMatrix<double>* BLOSUM50::getIndexMatrix() const
diff --git a/src/Bpp/Seq/AlphabetIndex/BLOSUM50.h b/src/Bpp/Seq/AlphabetIndex/BLOSUM50.h
index ea0b5fc..ccded85 100644
--- a/src/Bpp/Seq/AlphabetIndex/BLOSUM50.h
+++ b/src/Bpp/Seq/AlphabetIndex/BLOSUM50.h
@@ -96,6 +96,7 @@ public:
const Alphabet* getAlphabet() const { return alpha_; }
BLOSUM50* clone() const { return new BLOSUM50(); }
LinearMatrix<double>* getIndexMatrix() const;
+ bool isSymmetric() const { return true; }
/** @} */
};
} // end of namespace bpp.
diff --git a/src/Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.cpp b/src/Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.cpp
index 0265983..7eba9d2 100644
--- a/src/Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.cpp
+++ b/src/Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.cpp
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -79,7 +79,9 @@ throw (BadIntException)
if (alpha_->isGap(state2) || !alpha_->isIntInAlphabet(state2))
throw BadIntException(state2, "DefaultNucleotideScore::getIndex(). Invalid state1.", alpha_);
if (!alpha_->isUnresolved(state1) && !alpha_->isUnresolved(state2))
- return distanceMatrix_(state1, state2);
+ return distanceMatrix_(
+ static_cast<size_t>(state1),
+ static_cast<size_t>(state2));
vector<int> states1 = alpha_->getAlias(state1);
vector<int> states2 = alpha_->getAlias(state2);
double score = -5;
@@ -99,7 +101,9 @@ throw (BadIntException)
double DefaultNucleotideScore::getIndex(const std::string& state1, const std::string& state2) const
throw (BadCharException)
{
- return distanceMatrix_(alpha_->charToInt(state1), alpha_->charToInt(state2));
+ return distanceMatrix_(
+ static_cast<size_t>(alpha_->charToInt(state1)),
+ static_cast<size_t>(alpha_->charToInt(state2)));
}
LinearMatrix<double>* DefaultNucleotideScore::getIndexMatrix() const
diff --git a/src/Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.h b/src/Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.h
index b431bbf..30609cb 100644
--- a/src/Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.h
+++ b/src/Bpp/Seq/AlphabetIndex/DefaultNucleotideScore.h
@@ -100,6 +100,7 @@ public:
const Alphabet* getAlphabet() const { return alpha_; }
DefaultNucleotideScore* clone() const { return new DefaultNucleotideScore(*this); }
LinearMatrix<double>* getIndexMatrix() const;
+ bool isSymmetric() const { return true; }
/** @} */
};
} // end of namespace bpp.
diff --git a/src/Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.cpp b/src/Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.cpp
index 702246f..0399981 100644
--- a/src/Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.cpp
+++ b/src/Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.cpp
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -64,15 +64,13 @@ GranthamAAChemicalDistance::~GranthamAAChemicalDistance() {}
double GranthamAAChemicalDistance::getIndex(int state1, int state2) const
throw (BadIntException)
{
- if (state1 < 0 || state1 > 19)
- throw BadIntException(state1, "GranthamAAChemicalDistance::getIndex(). Invalid state1.", alpha_);
- if (state2 < 0 || state2 > 19)
- throw BadIntException(state2, "GranthamAAChemicalDistance::getIndex(). Invalid state2.", alpha_);
- double d = distanceMatrix_(state1, state2);
+ size_t stateIndex1 = alpha_->getStateIndex(state1);
+ size_t stateIndex2 = alpha_->getStateIndex(state2);
+ double d = distanceMatrix_(stateIndex1, stateIndex2);
if (sign_ == SIGN_NONE)
return NumTools::abs<double>(d);
if (sign_ == SIGN_PC1)
- return signMatrix_(state1, state2) * NumTools::abs<double>(d);
+ return signMatrix_(stateIndex1, stateIndex2) * NumTools::abs<double>(d);
return d;
}
@@ -87,9 +85,9 @@ Matrix<double>* GranthamAAChemicalDistance::getIndexMatrix() const
RowMatrix<double>* m = new RowMatrix<double>(distanceMatrix_);
if (sign_ == SIGN_NONE)
{
- for (unsigned int i = 0; i < 20; i++)
+ for (size_t i = 0; i < 20; ++i)
{
- for (unsigned int j = 0; j < 20; j++)
+ for (size_t j = 0; j < 20; ++j)
{
(*m)(i, j) = NumTools::abs<double>((*m)(i, j));
}
@@ -97,9 +95,9 @@ Matrix<double>* GranthamAAChemicalDistance::getIndexMatrix() const
}
else if (sign_ == SIGN_PC1)
{
- for (unsigned int i = 0; i < 20; i++)
+ for (size_t i = 0; i < 20; ++i)
{
- for (unsigned int j = 0; j < 20; j++)
+ for (size_t j = 0; j < 20; ++j)
{
(*m)(i, j) = signMatrix_(i, j) * NumTools::abs<double>((*m)(i, j));
}
diff --git a/src/Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.h b/src/Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.h
index aaa9d51..19037d5 100644
--- a/src/Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.h
+++ b/src/Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.h
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
diff --git a/src/Bpp/Seq/AlphabetIndex/GranthamAAPolarityIndex.h b/src/Bpp/Seq/AlphabetIndex/GranthamAAPolarityIndex.h
index 7dcc1a4..cf66348 100644
--- a/src/Bpp/Seq/AlphabetIndex/GranthamAAPolarityIndex.h
+++ b/src/Bpp/Seq/AlphabetIndex/GranthamAAPolarityIndex.h
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -105,12 +105,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "GranthamAAPolarityIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return polarity_[state];
+ return polarity_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return polarity_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return polarity_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(polarity_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/GranthamAAVolumeIndex.h b/src/Bpp/Seq/AlphabetIndex/GranthamAAVolumeIndex.h
index 75ad954..7d3d790 100644
--- a/src/Bpp/Seq/AlphabetIndex/GranthamAAVolumeIndex.h
+++ b/src/Bpp/Seq/AlphabetIndex/GranthamAAVolumeIndex.h
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -108,12 +108,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "GranthamAAVolumeIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return volume_[state];
+ return volume_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return volume_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return volume_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(volume_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/KleinAANetChargeIndex.h b/src/Bpp/Seq/AlphabetIndex/KleinAANetChargeIndex.h
index 76c3d2b..3d834ea 100644
--- a/src/Bpp/Seq/AlphabetIndex/KleinAANetChargeIndex.h
+++ b/src/Bpp/Seq/AlphabetIndex/KleinAANetChargeIndex.h
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -106,12 +106,12 @@ public:
double getIndex(int state) const throw (BadIntException)
{
if (state < 0 || state > 19) throw BadIntException(state, "KleinAANetChargeIndex::getIndex(). Invalid state.", &AlphabetTools::PROTEIN_ALPHABET);
- return charge_[state];
+ return charge_[static_cast<size_t>(state)];
}
double getIndex(const std::string& state) const throw (BadCharException)
{
- return charge_[AlphabetTools::PROTEIN_ALPHABET.charToInt(state)];
+ return charge_[static_cast<size_t>(AlphabetTools::PROTEIN_ALPHABET.charToInt(state))];
}
std::vector<double>* getIndexVector() const { return new std::vector<double>(charge_); }
diff --git a/src/Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.cpp b/src/Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.cpp
index 9d56acd..7b23c60 100644
--- a/src/Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.cpp
+++ b/src/Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.cpp
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -59,18 +59,18 @@ MiyataAAChemicalDistance::MiyataAAChemicalDistance() :
double MiyataAAChemicalDistance::getIndex(int state1, int state2) const
throw (BadIntException)
{
- if (state1 < 0 || state1 > 19)
- throw BadIntException(state1, "MiyataAAChemicalDistance::getIndex(). Invalid state1.", alpha_);
- if (state2 < 0 || state2 > 19)
- throw BadIntException(state2, "MiyataAAChemicalDistance::getIndex(). Invalid state2.", alpha_);
- double d = distanceMatrix_(state1, state2);
+ size_t stateIndex1 = alpha_->getStateIndex(state1);
+ size_t stateIndex2 = alpha_->getStateIndex(state2);
+ double d = distanceMatrix_(stateIndex1, stateIndex2);
return sym_ ? NumTools::abs<double>(d) : d;
}
double MiyataAAChemicalDistance::getIndex(const string& state1, const string& state2) const
throw (BadCharException)
{
- double d = distanceMatrix_(alpha_->charToInt(state1), alpha_->charToInt(state2));
+ double d = distanceMatrix_(
+ static_cast<size_t>(alpha_->charToInt(state1)),
+ static_cast<size_t>(alpha_->charToInt(state2)));
return sym_ ? NumTools::abs(d) : d;
}
diff --git a/src/Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.h b/src/Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.h
index 3bfaede..75974f3 100644
--- a/src/Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.h
+++ b/src/Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.h
@@ -1,13 +1,11 @@
//
// File: MiyataAAChemicalDistance.h
-// Created by: jdutheil <Julien.Dutheil at univ-montp2.fr>
+// Created by: Julien Dutheil
// Created on: Mon Feb 21 17:42 2005
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
-
- Julien.Dutheil at univ-montp2.fr
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
diff --git a/src/Bpp/Seq/AlphabetIndex/SimpleIndexDistance.h b/src/Bpp/Seq/AlphabetIndex/SimpleIndexDistance.h
index 35a4247..71818cb 100644
--- a/src/Bpp/Seq/AlphabetIndex/SimpleIndexDistance.h
+++ b/src/Bpp/Seq/AlphabetIndex/SimpleIndexDistance.h
@@ -100,17 +100,17 @@ public:
const Alphabet* getAlphabet() const { return index_->getAlphabet(); }
- Clonable* clone() const { return new SimpleIndexDistance(*this); }
+ SimpleIndexDistance* clone() const { return new SimpleIndexDistance(*this); }
Matrix<double>* getIndexMatrix() const
{
size_t n = index_->getAlphabet()->getSize(); //We should change to "supported ints" there...
RowMatrix<double>* m = new RowMatrix<double>(n, n);
- for (int i = 0; i < static_cast<int>(n); i++)
+ for (size_t i = 0; i < n; ++i)
{
- for (int j = 0; j < static_cast<int>(n); j++)
+ for (size_t j = 0; j < n; ++j)
{
- (*m)(i, j) = getIndex(i, j);
+ (*m)(i, j) = getIndex(static_cast<int>(i), static_cast<int>(j));
}
}
return m;
diff --git a/src/Bpp/Seq/AlphabetIndex/SimpleScore.cpp b/src/Bpp/Seq/AlphabetIndex/SimpleScore.cpp
index 00cf000..5c7801f 100644
--- a/src/Bpp/Seq/AlphabetIndex/SimpleScore.cpp
+++ b/src/Bpp/Seq/AlphabetIndex/SimpleScore.cpp
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -51,10 +51,10 @@ SimpleScore::SimpleScore(const Alphabet* alphabet, double match, double mismatch
alphabet_(alphabet)
{
// Load the matrix:
- unsigned int n = alphabet_->getSize();
- for (unsigned int i = 0; i < n; i++)
+ size_t n = alphabet_->getSize();
+ for (size_t i = 0; i < n; ++i)
{
- for (unsigned int j = 0; j < n; j++)
+ for (size_t j = 0; j < n; ++j)
{
distanceMatrix_(i, j) = (i == j ? match : mismatch);
}
@@ -64,17 +64,17 @@ SimpleScore::SimpleScore(const Alphabet* alphabet, double match, double mismatch
double SimpleScore::getIndex(int state1, int state2) const
throw (BadIntException)
{
- if (state1 < 0 || state1 > (int)alphabet_->getSize())
- throw BadIntException(state1, "SimpleScore::getIndex(). Invalid state1.", alphabet_);
- if (state2 < 0 || state2 > (int)alphabet_->getSize())
- throw BadIntException(state2, "SimpleScore::getIndex(). Invalid state2.", alphabet_);
- return distanceMatrix_(state1, state2);
+ size_t stateIndex1 = alphabet_->getStateIndex(state1);
+ size_t stateIndex2 = alphabet_->getStateIndex(state2);
+ return distanceMatrix_(stateIndex1, stateIndex2);
}
double SimpleScore::getIndex(const std::string& state1, const std::string& state2) const
throw (BadCharException)
{
- return distanceMatrix_(alphabet_->charToInt(state1), alphabet_->charToInt(state2));
+ size_t stateIndex1 = alphabet_->getStateIndex(state1);
+ size_t stateIndex2 = alphabet_->getStateIndex(state2);
+ return distanceMatrix_(stateIndex1, stateIndex2);
}
LinearMatrix<double>* SimpleScore::getIndexMatrix() const
diff --git a/src/Bpp/Seq/AlphabetIndex/SimpleScore.h b/src/Bpp/Seq/AlphabetIndex/SimpleScore.h
index 3dbc5f0..4590c39 100644
--- a/src/Bpp/Seq/AlphabetIndex/SimpleScore.h
+++ b/src/Bpp/Seq/AlphabetIndex/SimpleScore.h
@@ -97,6 +97,7 @@ public:
double getIndex(const std::string& state1, const std::string& state2) const throw (BadCharException);
const Alphabet* getAlphabet() const { return alphabet_; }
LinearMatrix<double>* getIndexMatrix() const;
+ bool isSymmetric() const { return true; }
/** @} */
};
} // end of namespace bpp.
diff --git a/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.h b/src/Bpp/Seq/AlphabetIndex/UserAlphabetIndex1.h
similarity index 57%
copy from src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.h
copy to src/Bpp/Seq/AlphabetIndex/UserAlphabetIndex1.h
index 1e12f00..f2e16d9 100644
--- a/src/Bpp/Seq/AlphabetIndex/AAIndex1Entry.h
+++ b/src/Bpp/Seq/AlphabetIndex/UserAlphabetIndex1.h
@@ -1,7 +1,7 @@
//
-// File: AAIndex1Entry.h
-// Created by: Julien Dutheil
-// Created on: Fri Jan 19 17:07 2007
+// File: UserAlphabetIndex1.h
+// Created by: Laurent Guéguen
+// Created on: vendredi 29 mars 2013, à 13h 05
//
/*
@@ -37,66 +37,75 @@
knowledge of the CeCILL license and that you accept its terms.
*/
-#ifndef _AAINDEX1ENTRY_H_
-#define _AAINDEX1ENTRY_H_
+#ifndef _USERALPHABETINDEX1_H_
+#define _USERALPHABETINDEX1_H_
#include "AlphabetIndex1.h"
-#include "../Alphabet/ProteicAlphabet.h"
+
+// From the STL:
+#include <vector>
namespace bpp
{
-/**
- * @brief Create a AlphabetIndex1 object from an AAIndex2 entry.
- */
-class AAIndex1Entry :
- public AlphabetIndex1
+ /**
+ * @brief Alphabet index given by user.
+ */
+
+ class UserAlphabetIndex1 :
+ public virtual AlphabetIndex1
{
private:
- std::vector<double> property_;
- const ProteicAlphabet* alpha_;
-
+ const Alphabet* alph_;
+ std::vector<double> index_;
+
public:
- /**
- * @brief Create a new AAIndex1Entry from an input stream.
- *
- * @param input The input stream to use.
- * @throw IOException if the stream content does not follow the AAIndex1 database entry format.
- */
- AAIndex1Entry(std::istream& input) throw (IOException);
+ UserAlphabetIndex1(const Alphabet* alph) :
+ alph_(alph),
+ index_(alph->getSize(),0)
+ {}
- AAIndex1Entry(const AAIndex1Entry& index) :
- property_(index.property_),
- alpha_(index.alpha_)
+ UserAlphabetIndex1(const UserAlphabetIndex1& uAlph) :
+ alph_(uAlph.alph_),//->clone()),
+ index_(uAlph.index_)
{}
- AAIndex1Entry& operator=(const AAIndex1Entry& index)
+ UserAlphabetIndex1& operator=(const UserAlphabetIndex1& uAlph)
{
- property_ = index.property_;
- alpha_ = index.alpha_;
+ alph_ = uAlph.alph_;//->clone();
+ index_ = uAlph.index_;
return *this;
- }
-
- virtual ~AAIndex1Entry() {}
+ }
+
+ virtual ~UserAlphabetIndex1() {}
- AAIndex1Entry* clone() const { return new AAIndex1Entry(*this); }
+ UserAlphabetIndex1* clone() const { return new UserAlphabetIndex1(*this); }
public:
- double getIndex(int state) const throw (BadIntException)
+ double getIndex(int state) const
+ {
+ return index_[alph_->getStateIndex(state)];
+ }
+
+ void setIndex(int state, double val)
+ {
+ index_[alph_->getStateIndex(state)] = val;
+ }
+
+ double getIndex(const std::string& state) const
{
- if (state < 0 || state > 19) throw BadIntException(state, "KleinAANetChargeIndex::getIndex(). Invalid state.", alpha_);
- return property_[state];
+ return index_[alph_->getStateIndex(state)];
}
- double getIndex(const std::string& state) const throw (BadCharException)
+ void setIndex(const std::string& state, double val)
{
- return property_[alpha_->charToInt(state)];
+ index_[alph_->getStateIndex(state)] = val;
}
- std::vector<double>* getIndexVector() const { return new std::vector<double>(property_); }
+ std::vector<double>* getIndexVector() const { return new std::vector<double>(index_); }
- const Alphabet* getAlphabet() const { return alpha_; }
+ const Alphabet* getAlphabet() const { return alph_; }
};
} // end of namespace bpp.
-#endif // _AAINDEX1ENTRY_H_
+#endif // _USERALPHABETINDEX1_H_
diff --git a/src/Bpp/Seq/App/SequenceApplicationTools.cpp b/src/Bpp/Seq/App/SequenceApplicationTools.cpp
index e7502d4..a253854 100644
--- a/src/Bpp/Seq/App/SequenceApplicationTools.cpp
+++ b/src/Bpp/Seq/App/SequenceApplicationTools.cpp
@@ -41,17 +41,15 @@
#include "SequenceApplicationTools.h"
#include "../Alphabet/BinaryAlphabet.h"
#include "../Alphabet/DefaultAlphabet.h"
-#include "../Alphabet/EchinodermMitochondrialCodonAlphabet.h"
-#include "../Alphabet/InvertebrateMitochondrialCodonAlphabet.h"
-#include "../Alphabet/StandardCodonAlphabet.h"
-#include "../Alphabet/VertebrateMitochondrialCodonAlphabet.h"
-#include "../Alphabet/YeastMitochondrialCodonAlphabet.h"
+#include "../Alphabet/CodonAlphabet.h"
#include "../Alphabet/AlphabetTools.h"
#include "../GeneticCode/EchinodermMitochondrialGeneticCode.h"
#include "../GeneticCode/InvertebrateMitochondrialGeneticCode.h"
#include "../GeneticCode/StandardGeneticCode.h"
#include "../GeneticCode/VertebrateMitochondrialGeneticCode.h"
#include "../GeneticCode/YeastMitochondrialGeneticCode.h"
+#include "../GeneticCode/AscidianMitochondrialGeneticCode.h"
+#include "../GeneticCode/MoldMitochondrialGeneticCode.h"
#include "../Io/BppOSequenceReaderFormat.h"
#include "../Io/BppOAlignmentReaderFormat.h"
#include "../Io/BppOSequenceWriterFormat.h"
@@ -64,6 +62,8 @@
#include <Bpp/App/ApplicationTools.h>
#include <Bpp/Text/TextTools.h>
#include <Bpp/Text/KeyvalTools.h>
+#include <Bpp/App/NumCalcApplicationTools.h>
+#include <Bpp/Numeric/Random/RandomTools.h>
using namespace bpp;
using namespace std;
@@ -75,10 +75,11 @@ Alphabet* SequenceApplicationTools::getAlphabet(
const string& suffix,
bool suffixIsOptional,
bool verbose,
- bool allowGeneric) throw (Exception)
+ bool allowGeneric,
+ int warn) throw (Exception)
{
Alphabet* chars;
- string alphtt = ApplicationTools::getStringParameter("alphabet", params, "DNA", suffix, suffixIsOptional);
+ string alphtt = ApplicationTools::getStringParameter("alphabet", params, "DNA", suffix, suffixIsOptional, warn);
string alphabet = "";
map<string, string> args;
@@ -105,17 +106,16 @@ Alphabet* SequenceApplicationTools::getAlphabet(
flag = 2;
}
-
if (alphabet == "Binary")
chars = new BinaryAlphabet();
else if (alphabet == "DNA")
{
- bool mark = ApplicationTools::getBooleanParameter("bangAsGap", args, false, "", true, false);
+ bool mark = ApplicationTools::getBooleanParameter("bangAsGap", args, false, "", true, warn + 1);
chars = new DNA(mark);
}
else if (alphabet == "RNA")
{
- bool mark = ApplicationTools::getBooleanParameter("bangAsGap", args, false, "", true, false);
+ bool mark = ApplicationTools::getBooleanParameter("bangAsGap", args, false, "", true, warn + 1);
chars = new RNA(mark);
}
else if (alphabet == "Protein")
@@ -126,6 +126,8 @@ Alphabet* SequenceApplicationTools::getAlphabet(
{
if (args.find("letter") == args.end())
throw Exception("Missing 'letter' argument in Codon :" + alphabet);
+ if (args.find("type") != args.end())
+ throw Exception("'type' argument in Codon is deprecated and has been superseded by the 'genetic_code' option.");
string alphnDesc = ApplicationTools::getStringParameter("letter", args, "RNA");
string alphn;
@@ -135,31 +137,19 @@ Alphabet* SequenceApplicationTools::getAlphabet(
NucleicAlphabet* pnalph;
if (alphn == "RNA")
{
- bool mark = ApplicationTools::getBooleanParameter("bangAsGap", alphnArgs, false, "", true, false);
+ bool mark = ApplicationTools::getBooleanParameter("bangAsGap", alphnArgs, false, "", true, warn + 1);
pnalph = new RNA(mark);
}
else if (alphn == "DNA")
{
- bool mark = ApplicationTools::getBooleanParameter("bangAsGap", alphnArgs, false, "", true, false);
+ bool mark = ApplicationTools::getBooleanParameter("bangAsGap", alphnArgs, false, "", true, warn + 1);
pnalph = new DNA(mark);
}
else
throw Exception("Alphabet not known in Codon : " + alphn);
- string type = ApplicationTools::getStringParameter("type", args, "Standard");
-
- if (type == "EchinodermMitochondrial")
- chars = new EchinodermMitochondrialCodonAlphabet(pnalph);
- else if (type == "InvertebrateMitochondrial")
- chars = new InvertebrateMitochondrialCodonAlphabet(pnalph);
- else if (type == "Standard")
- chars = new StandardCodonAlphabet(pnalph);
- else if (type == "VertebrateMitochondrial")
- chars = new VertebrateMitochondrialCodonAlphabet(pnalph);
- else if (type == "YeastMitochondrial")
- chars = new YeastMitochondrialCodonAlphabet(pnalph);
- else
- throw Exception("Unknown Alphabet : " + alphabet);
+
+ chars = new CodonAlphabet(pnalph);
alphabet = alphabet + "(" + alphn + ")";
}
else
@@ -199,16 +189,20 @@ GeneticCode* SequenceApplicationTools::getGeneticCode(
const string& description) throw (Exception)
{
GeneticCode* geneCode;
- if (description.find("EchinodermMitochondrial") != string::npos)
+ if (description.find("EchinodermMitochondrial") != string::npos || description.find("9") != string::npos)
geneCode = new EchinodermMitochondrialGeneticCode(alphabet);
- else if (description.find("InvertebrateMitochondrial") != string::npos)
+ else if (description.find("InvertebrateMitochondrial") != string::npos || description.find("5") != string::npos)
geneCode = new InvertebrateMitochondrialGeneticCode(alphabet);
- else if (description.find("Standard") != string::npos)
+ else if (description.find("Standard") != string::npos || description.find("1") != string::npos)
geneCode = new StandardGeneticCode(alphabet);
- else if (description.find("VertebrateMitochondrial") != string::npos)
+ else if (description.find("VertebrateMitochondrial") != string::npos || description.find("2") != string::npos)
geneCode = new VertebrateMitochondrialGeneticCode(alphabet);
- else if (description.find("YeastMitochondrial") != string::npos)
+ else if (description.find("YeastMitochondrial") != string::npos || description.find("3") != string::npos)
geneCode = new YeastMitochondrialGeneticCode(alphabet);
+ else if (description.find("AscidianMitochondrial") != string::npos || description.find("13") != string::npos)
+ geneCode = new AscidianMitochondrialGeneticCode(alphabet);
+ else if (description.find("MoldMitochondrial") != string::npos || description.find("4") != string::npos)
+ geneCode = new MoldMitochondrialGeneticCode(alphabet);
else
throw Exception("Unknown GeneticCode: " + description);
return geneCode;
@@ -236,11 +230,12 @@ SequenceContainer* SequenceApplicationTools::getSequenceContainer(
map<string, string>& params,
const string& suffix,
bool suffixIsOptional,
- bool verbose)
+ bool verbose,
+ int warn)
{
- string sequenceFilePath = ApplicationTools::getAFilePath("input.sequence.file", params, true, true, suffix, suffixIsOptional);
- string sequenceFormat = ApplicationTools::getStringParameter("input.sequence.format", params, "Fasta()", suffix, suffixIsOptional);
- BppOSequenceReaderFormat bppoReader(verbose);
+ string sequenceFilePath = ApplicationTools::getAFilePath("input.sequence.file", params, true, true, suffix, suffixIsOptional, "none", warn);
+ string sequenceFormat = ApplicationTools::getStringParameter("input.sequence.format", params, "Fasta()", suffix, suffixIsOptional, warn);
+ BppOSequenceReaderFormat bppoReader(warn);
auto_ptr<ISequence> iSeq(bppoReader.read(sequenceFormat));
if (verbose)
{
@@ -259,11 +254,12 @@ VectorSiteContainer* SequenceApplicationTools::getSiteContainer(
map<string, string>& params,
const string& suffix,
bool suffixIsOptional,
- bool verbose)
+ bool verbose,
+ int warn)
{
- string sequenceFilePath = ApplicationTools::getAFilePath("input.sequence.file", params, true, true, suffix, suffixIsOptional);
- string sequenceFormat = ApplicationTools::getStringParameter("input.sequence.format", params, "Fasta()", suffix, suffixIsOptional);
- BppOAlignmentReaderFormat bppoReader(verbose);
+ string sequenceFilePath = ApplicationTools::getAFilePath("input.sequence.file", params, true, true, suffix, suffixIsOptional, "none", warn);
+ string sequenceFormat = ApplicationTools::getStringParameter("input.sequence.format", params, "Fasta()", suffix, suffixIsOptional, warn);
+ BppOAlignmentReaderFormat bppoReader(warn);
auto_ptr<IAlignment> iAln(bppoReader.read(sequenceFormat));
map<string, string> args(bppoReader.getUnparsedArguments());
if (verbose)
@@ -298,11 +294,13 @@ VectorSiteContainer* SequenceApplicationTools::getSiteContainer(
else
sites = sites2;
+
+
// Look for site selection:
if (iAln->getFormatName() == "MASE file")
{
// getting site set:
- string siteSet = ApplicationTools::getStringParameter("siteSelection", args, "none", suffix, suffixIsOptional, false);
+ string siteSet = ApplicationTools::getStringParameter("siteSelection", args, "none", suffix, suffixIsOptional, warn + 1);
if (siteSet != "none")
{
VectorSiteContainer* selectedSites;
@@ -324,6 +322,62 @@ VectorSiteContainer* SequenceApplicationTools::getSiteContainer(
sites = selectedSites;
}
}
+ else
+ {
+ // getting site set:
+ size_t nbSites = sites->getNumberOfSites();
+
+ string siteSet = ApplicationTools::getStringParameter("input.site.selection", params, "none", suffix, suffixIsOptional, warn + 1);
+
+ VectorSiteContainer* selectedSites=0;
+ if (siteSet != "none")
+ {
+ vector<size_t> vSite;
+ try {
+ vector<int> vSite1 = NumCalcApplicationTools::seqFromString(siteSet);
+ for (size_t i = 0; i < vSite1.size(); ++i){
+ int x = (vSite1[i] >= 0 ? vSite1[i] : static_cast<int>(nbSites) + vSite1[i]);
+ if (x >= 0)
+ vSite.push_back(static_cast<size_t>(x-1));
+ else
+ throw Exception("SequenceApplicationTools::getSiteContainer(). Incorrect negative index: " + TextTools::toString(x));
+ }
+ selectedSites = dynamic_cast<VectorSiteContainer*>(SiteContainerTools::getSelectedSites(*sites, vSite));
+ }
+ catch (Exception& e)
+ {
+ string seln;
+ map<string, string> selArgs;
+ KeyvalTools::parseProcedure(siteSet, seln, selArgs);
+ if (seln == "Sample")
+ {
+ size_t n = ApplicationTools::getParameter<size_t>("n", selArgs, nbSites, "", true, warn + 1);
+ bool replace = ApplicationTools::getBooleanParameter("replace", selArgs, false, "", true, warn + 1);
+
+ vSite.resize(n);
+ vector<size_t> vPos;
+ for (size_t p = 0; p < nbSites; ++p)
+ vPos.push_back(p);
+
+ RandomTools::getSample(vPos, vSite, replace);
+
+ selectedSites = dynamic_cast<VectorSiteContainer*>(SiteContainerTools::getSelectedSites(*sites, vSite));
+ if (replace)
+ selectedSites->reindexSites();
+ }
+ }
+
+ if (verbose)
+ ApplicationTools::displayResult("Selected sites", TextTools::toString(siteSet));
+
+ if (selectedSites && (selectedSites->getNumberOfSites() == 0))
+ {
+ throw Exception("Site set '" + siteSet + "' is empty.");
+ }
+ delete sites;
+ sites = selectedSites;
+ }
+ }
return sites;
}
@@ -335,19 +389,20 @@ VectorSiteContainer* SequenceApplicationTools::getSitesToAnalyse(
string suffix,
bool suffixIsOptional,
bool gapAsUnknown,
- bool verbose)
+ bool verbose,
+ int warn)
{
// Fully resolved sites, i.e. without jokers and gaps:
SiteContainer* sitesToAnalyse;
VectorSiteContainer* sitesToAnalyse2;
- string option = ApplicationTools::getStringParameter("input.sequence.sites_to_use", params, "complete", suffix, suffixIsOptional);
+ string option = ApplicationTools::getStringParameter("input.sequence.sites_to_use", params, "complete", suffix, suffixIsOptional, warn);
if (verbose)
ApplicationTools::displayResult("Sites to use", option);
if (option == "all")
{
sitesToAnalyse = new VectorSiteContainer(allSites);
- string maxGapOption = ApplicationTools::getStringParameter("input.sequence.max_gap_allowed", params, "100%", suffix, suffixIsOptional);
+ string maxGapOption = ApplicationTools::getStringParameter("input.sequence.max_gap_allowed", params, "100%", suffix, suffixIsOptional, warn);
if (maxGapOption[maxGapOption.size() - 1] == '%')
{
@@ -390,9 +445,9 @@ VectorSiteContainer* SequenceApplicationTools::getSitesToAnalyse(
}
}
- string maxUnresolvedOption = ApplicationTools::getStringParameter("input.sequence.max_unresolved_allowed", params, "100%", suffix, suffixIsOptional);
+ string maxUnresolvedOption = ApplicationTools::getStringParameter("input.sequence.max_unresolved_allowed", params, "100%", suffix, suffixIsOptional, warn);
- size_t sAlph = sitesToAnalyse->getAlphabet()->getSize();
+ int sAlph = static_cast<int>(sitesToAnalyse->getAlphabet()->getSize());
if (maxUnresolvedOption[maxUnresolvedOption.size() - 1] == '%')
{
@@ -408,7 +463,7 @@ VectorSiteContainer* SequenceApplicationTools::getSitesToAnalyse(
map<int, double> freq;
SiteTools::getFrequencies(sitesToAnalyse->getSite(i - 1), freq);
double x = 0;
- for (unsigned int l = 0; l < sAlph; l++)
+ for (int l = 0; l < sAlph; ++l)
{
x += freq[l];
}
@@ -434,7 +489,7 @@ VectorSiteContainer* SequenceApplicationTools::getSitesToAnalyse(
map<int, size_t> counts;
SiteTools::getCounts(sitesToAnalyse->getSite(i - 1), counts);
size_t x = 0;
- for (int l = 0; l < static_cast<int>(sAlph); l++)
+ for (int l = 0; l < sAlph; l++)
{
x += counts[l];
}
@@ -471,15 +526,18 @@ VectorSiteContainer* SequenceApplicationTools::getSitesToAnalyse(
throw Exception("Option '" + option + "' unknown in parameter 'sequence.sites_to_use'.");
}
- if (AlphabetTools::isCodonAlphabet(sitesToAnalyse->getAlphabet()))
+ const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(sitesToAnalyse->getAlphabet());
+ if (ca)
{
- option = ApplicationTools::getStringParameter("input.sequence.remove_stop_codons", params, "no", suffix, true);
+ option = ApplicationTools::getStringParameter("input.sequence.remove_stop_codons", params, "no", suffix, true, warn);
if ((option != "") && verbose)
ApplicationTools::displayResult("Remove Stop Codons", option);
if (option == "yes")
{
- sitesToAnalyse2 = dynamic_cast<VectorSiteContainer*>(SiteContainerTools::removeStopCodonSites(*sitesToAnalyse));
+ string codeDesc = ApplicationTools::getStringParameter("genetic_code", params, "Standard", "", true, warn);
+ auto_ptr<GeneticCode> gCode(getGeneticCode(ca->getNucleicAlphabet(), codeDesc));
+ sitesToAnalyse2 = dynamic_cast<VectorSiteContainer*>(SiteContainerTools::removeStopCodonSites(*sitesToAnalyse, *gCode));
delete sitesToAnalyse;
}
else
@@ -497,11 +555,12 @@ void SequenceApplicationTools::writeSequenceFile(
const SequenceContainer& sequences,
map<string, string>& params,
const string& suffix,
- bool verbose)
+ bool verbose,
+ int warn)
{
- string sequenceFilePath = ApplicationTools::getAFilePath("output.sequence.file", params, true, false, suffix, false);
- string sequenceFormat = ApplicationTools::getStringParameter("output.sequence.format", params, "Fasta", suffix, false, true);
- BppOSequenceWriterFormat bppoWriter(verbose);
+ string sequenceFilePath = ApplicationTools::getAFilePath("output.sequence.file", params, true, false, suffix, false, "none", warn);
+ string sequenceFormat = ApplicationTools::getStringParameter("output.sequence.format", params, "Fasta", suffix, false, warn);
+ BppOSequenceWriterFormat bppoWriter(warn);
auto_ptr<OSequence> oSeq(bppoWriter.read(sequenceFormat));
if (verbose)
{
@@ -519,11 +578,12 @@ void SequenceApplicationTools::writeAlignmentFile(
const SiteContainer& sequences,
map<string, string>& params,
const string& suffix,
- bool verbose)
+ bool verbose,
+ int warn)
{
- string sequenceFilePath = ApplicationTools::getAFilePath("output.sequence.file", params, true, false, suffix, false);
- string sequenceFormat = ApplicationTools::getStringParameter("output.sequence.format", params, "Fasta", suffix, false, true);
- BppOAlignmentWriterFormat bppoWriter(verbose);
+ string sequenceFilePath = ApplicationTools::getAFilePath("output.sequence.file", params, true, false, suffix, false, "none", warn);
+ string sequenceFormat = ApplicationTools::getStringParameter("output.sequence.format", params, "Fasta", suffix, false, warn);
+ BppOAlignmentWriterFormat bppoWriter(warn);
auto_ptr<OAlignment> oAln(bppoWriter.read(sequenceFormat));
if (verbose)
{
diff --git a/src/Bpp/Seq/App/SequenceApplicationTools.h b/src/Bpp/Seq/App/SequenceApplicationTools.h
index d94920b..ae2ecbe 100644
--- a/src/Bpp/Seq/App/SequenceApplicationTools.h
+++ b/src/Bpp/Seq/App/SequenceApplicationTools.h
@@ -6,37 +6,37 @@
//
/*
- Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
- This software is a computer program whose purpose is to provide classes
- for sequences analysis.
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
- This software is governed by the CeCILL license under French law and
- abiding by the rules of distribution of free software. You can use,
- modify and/ or redistribute the software under the terms of the CeCILL
- license as circulated by CEA, CNRS and INRIA at the following URL
- "http://www.cecill.info".
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
- As a counterpart to the access to the source code and rights to copy,
- modify and redistribute granted by the license, users are provided only
- with a limited warranty and the software's author, the holder of the
- economic rights, and the successive licensors have only limited
- liability.
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
- In this respect, the user's attention is drawn to the risks associated
- with loading, using, modifying and/or developing or reproducing the
- software by the user in light of its specific status of free software,
- that may mean that it is complicated to manipulate, and that also
- therefore means that it is reserved for developers and experienced
- professionals having in-depth computer knowledge. Users are therefore
- encouraged to load and test the software's suitability as regards their
- requirements in conditions enabling the security of their systems and/or
- data to be ensured and, more generally, to use and operate it in the
- same conditions as regards security.
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
- The fact that you are presently reading this means that you have had
- knowledge of the CeCILL license and that you accept its terms.
- */
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+*/
#ifndef _SEQUENCEAPPLICATIONTOOLS_H_
#define _SEQUENCEAPPLICATIONTOOLS_H_
@@ -67,202 +67,221 @@ namespace bpp
*
* @see ApplicationTools
*/
-class SequenceApplicationTools
-{
-public:
- SequenceApplicationTools() {}
- virtual ~SequenceApplicationTools() {}
-
-public:
- /**
- * @brief Build an Alphabet object according to options.
- *
- * Options used are:
- * - alphabet = [DNA|RNA|Protein], the alphabet type to use.
- * = [DNA|RNA|Protein](length=n) a word-alphabet of
- * words with length n
- * = [EchinodermMitochondrialCodonAlphabet
- * | InvertebrateMitochondrialCodonAlphabet
- * | InvertebrateMitochondrialCodonAlphabet
- * | StandardCodonAlphabet
- * | VertebrateMitochondrialCodonAlphabet]([alphn=NA|RNA])
- * a codon-alphabet
- *
- * @param params The attribute map where options may be found.
- * @param suffix A suffix to be applied to each attribute name.
- * @param suffixIsOptional Tell if the suffix is absolutely required.
- * @param verbose Print some info to the 'message' output stream.
- * @param allowGeneric Tell if generic alphabets can be used.
- * @return A new Alphabet object according to options specified.
- */
- static Alphabet* getAlphabet(
- std::map<std::string, std::string>& params,
- const std::string& suffix = "",
- bool suffixIsOptional = true,
- bool verbose = true,
- bool allowGeneric = false) throw (Exception);
+ class SequenceApplicationTools
+ {
+ public:
+ SequenceApplicationTools() {}
+ virtual ~SequenceApplicationTools() {}
- /**
- * @brief Build a GeneticCode object according to options.
- *
- * @param alphabet pointer to the NucleicAlphabet
- * @param description for the name of the GeneticCode:
- * [EchinodermMitochondrialGeneticCode
- * | InvertebrateMitochondrialGeneticCode
- * | InvertebrateMitochondrialGeneticCode
- * | StandardGeneticCode
- * | VertebrateMitochondrialGeneticCode]
- * @return A new GeneticCode object
- * @throw Exception in case of bad description.
- */
- static GeneticCode* getGeneticCode(const NucleicAlphabet* alphabet, const std::string& description) throw (Exception);
+ public:
+ /**
+ * @brief Build an Alphabet object according to options.
+ *
+ * Options used are:
+ * - alphabet = [DNA|RNA|Protein], the alphabet type to use.
+ * = [DNA|RNA|Protein](length=n) a word-alphabet of
+ * words with length n
+ * = [EchinodermMitochondrialCodonAlphabet
+ * | InvertebrateMitochondrialCodonAlphabet
+ * | InvertebrateMitochondrialCodonAlphabet
+ * | StandardCodonAlphabet
+ * | VertebrateMitochondrialCodonAlphabet]([alphn=NA|RNA])
+ * a codon-alphabet
+ *
+ * @param params The attribute map where options may be found.
+ * @param suffix A suffix to be applied to each attribute name.
+ * @param suffixIsOptional Tell if the suffix is absolutely required.
+ * @param verbose Print some info to the 'message' output stream.
+ * @param allowGeneric Tell if generic alphabets can be used.
+ * @param warn Set the warning level (0: always display warnings, >0 display warnings on demand).
+ * @return A new Alphabet object according to options specified.
+ */
+ static Alphabet* getAlphabet(
+ std::map<std::string, std::string>& params,
+ const std::string& suffix = "",
+ bool suffixIsOptional = true,
+ bool verbose = true,
+ bool allowGeneric = false,
+ int warn = 1) throw (Exception);
+ /**
+ * @brief Build a GeneticCode object according to options.
+ *
+ * @param alphabet pointer to the NucleicAlphabet
+ * @param description for the name of the GeneticCode:
+ * [EchinodermMitochondrialGeneticCode
+ * | InvertebrateMitochondrialGeneticCode
+ * | InvertebrateMitochondrialGeneticCode
+ * | StandardGeneticCode
+ * | VertebrateMitochondrialGeneticCode]
+ * @return A new GeneticCode object
+ * @throw Exception in case of bad description.
+ */
+ static GeneticCode* getGeneticCode(const NucleicAlphabet* alphabet, const std::string& description) throw (Exception);
- /**
- * @brief Build a AlphabetIndex1 object for a given alphabet.
- *
- * @param alphabet The alphabet to use. This is currently only used for assessing the type of distance allowed.
- * @param description Which distance to use. See the Bio++ Program Suite reference manual for a description of the syntax.
- * @param message To be displayed when parsing.
- * @param verbose Tell if some info should be displayed while parsing.
- * @return A new AlphabetIndex1 object.
- * @throw Exception in case of bad description.
- */
- static AlphabetIndex1* getAlphabetIndex1(const Alphabet* alphabet, const std::string& description, const std::string& message = "Alphabet distance:", bool verbose = true) throw (Exception);
+ /**
+ * @brief Build a AlphabetIndex1 object for a given alphabet.
+ *
+ * @param alphabet The alphabet to use. This is currently only used for assessing the type of distance allowed.
+ * @param description Which distance to use. See the Bio++ Program Suite reference manual for a description of the syntax.
+ * @param message To be displayed when parsing.
+ * @param verbose Tell if some info should be displayed while parsing.
+ * @return A new AlphabetIndex1 object.
+ * @throw Exception in case of bad description.
+ */
+ static AlphabetIndex1* getAlphabetIndex1(
+ const Alphabet* alphabet,
+ const std::string& description,
+ const std::string& message = "Alphabet distance:",
+ bool verbose = true) throw (Exception);
- /**
- * @brief Build a AlphabetIndex2 object for a given alphabet.
- *
- * @param alphabet The alphabet to use. This is currently only used for assessing the type of distance allowed.
- * @param description Which distance to use. See the Bio++ Program Suite reference manual for a description of the syntax.
- * @param message To be displayed when parsing.
- * @return A new AlphabetIndex2 object.
- * @param verbose Tell if some info should be displayed while parsing.
- * @throw Exception in case of bad description.
- */
- static AlphabetIndex2* getAlphabetIndex2(const Alphabet* alphabet, const std::string& description, const std::string& message = "Alphabet distance:", bool verbose = true) throw (Exception);
+ /**
+ * @brief Build a AlphabetIndex2 object for a given alphabet.
+ *
+ * @param alphabet The alphabet to use. This is currently only used for assessing the type of distance allowed.
+ * @param description Which distance to use. See the Bio++ Program Suite reference manual for a description of the syntax.
+ * @param message To be displayed when parsing.
+ * @return A new AlphabetIndex2 object.
+ * @param verbose Tell if some info should be displayed while parsing.
+ * @throw Exception in case of bad description.
+ */
+ static AlphabetIndex2* getAlphabetIndex2(
+ const Alphabet* alphabet,
+ const std::string& description,
+ const std::string& message = "Alphabet distance:",
+ bool verbose = true) throw (Exception);
- /**
- * @brief Build a SequenceContainer object according to options.
- *
- * The sequences do not have to be aligned.
- * The supported sequence formats are Fasta, DCSE, Clustal, Mase, Phylip and GenBank.
- *
- * See the Bio++ program suite manual for a full description of the syntax.
- *
- * @param alpha The alphabet to use in the container.
- * @param params The attribute map where options may be found.
- * @param suffix A suffix to be applied to each attribute name.
- * @param suffixIsOptional Tell if the suffix is absolutely required.
- * @param verbose Print some info to the 'message' output stream.
- * @return A new VectorSequenceContainer object according to options specified.
- * @see getSiteContainer to read an alignment.
- */
+ /**
+ * @brief Build a SequenceContainer object according to options.
+ *
+ * The sequences do not have to be aligned.
+ * The supported sequence formats are Fasta, DCSE, Clustal, Mase, Phylip and GenBank.
+ *
+ * See the Bio++ program suite manual for a full description of the syntax.
+ *
+ * @param alpha The alphabet to use in the container.
+ * @param params The attribute map where options may be found.
+ * @param suffix A suffix to be applied to each attribute name.
+ * @param suffixIsOptional Tell if the suffix is absolutely required.
+ * @param verbose Print some info to the 'message' output stream.
+ * @param warn Set the warning level (0: always display warnings, >0 display warnings on demand).
+ * @return A new VectorSequenceContainer object according to options specified.
+ * @see getSiteContainer to read an alignment.
+ */
- static SequenceContainer* getSequenceContainer(
- const Alphabet* alpha,
- std::map<std::string, std::string>& params,
- const std::string& suffix = "",
- bool suffixIsOptional = true,
- bool verbose = true);
+ static SequenceContainer* getSequenceContainer(
+ const Alphabet* alpha,
+ std::map<std::string, std::string>& params,
+ const std::string& suffix = "",
+ bool suffixIsOptional = true,
+ bool verbose = true,
+ int warn = 1);
- /**
- * @brief Build a SiteContainer object according to options.
- *
- * Sequences in file must be aligned.
- * The supported sequence formats are Fasta, DCSE, Clustal, Mase and Phylip.
- *
- * See the Bio++ program suite manual for a full description of the syntax.
- *
- * @param alpha The alphabet to use in the container.
- * @param params The attribute map where options may be found.
- * @param suffix A suffix to be applied to each attribute name.
- * @param suffixIsOptional Tell if the suffix is absolutely required.
- * @param verbose Print some info to the 'message' output stream.
- * @return A new VectorSiteContainer object according to options specified.
- */
- static VectorSiteContainer* getSiteContainer(
- const Alphabet* alpha,
- std::map<std::string, std::string>& params,
- const std::string& suffix = "",
- bool suffixIsOptional = true,
- bool verbose = true);
+ /**
+ * @brief Build a SiteContainer object according to options.
+ *
+ * Sequences in file must be aligned.
+ * The supported sequence formats are Fasta, DCSE, Clustal, Mase and Phylip.
+ *
+ * See the Bio++ program suite manual for a full description of the syntax.
+ *
+ * @param alpha The alphabet to use in the container.
+ * @param params The attribute map where options may be found.
+ * @param suffix A suffix to be applied to each attribute name.
+ * @param suffixIsOptional Tell if the suffix is absolutely required.
+ * @param verbose Print some info to the 'message' output stream.
+ * @param warn Set the warning level (0: always display warnings, >0 display warnings on demand).
+ * @return A new VectorSiteContainer object according to options specified.
+ */
+ static VectorSiteContainer* getSiteContainer(
+ const Alphabet* alpha,
+ std::map<std::string, std::string>& params,
+ const std::string& suffix = "",
+ bool suffixIsOptional = true,
+ bool verbose = true,
+ int warn = 1);
- /**
- * @brief Retrieves sites suitable for the analysis.
- *
- * Options used are:
- * - sequence.sites_to_use = [all|complete|nogap].
- *
- * If the 'complete' option is used, only fully resolve site will be taken
- * into account.
- * If the 'nogap' option is used, only sites without gap will be taken into
- * account.
- * If 'gapAsUnknown' is set to true and the all option is selected, gaps will
- * be changed to 'unknown' character is sequences.
- *
- * - sequence.max_gap_allowed = [57%|30]
- * If a % sign fallow the number, it is taken to be a frequence (in percent).
- * This specify the maximum amount of gaps allowed for each site.
- * Sites not satisfying this amount will be removed.
- * A value of 100% will remove all gap-only sites, a value >100% will keep all sites.
- *
- * @param allSites The site container from which sites must be retrieved.
- * @param params The attribute map where options may be found.
- * @param suffix A suffix to be applied to each attribute name.
- * @param suffixIsOptional Tell if the suffix is absolutely required.
- * @param gapAsUnknown Convert gaps to unknown characters.
- * @param verbose Print some info to the 'message' output stream.
- * @return A new VectorSiteContainer object containing sites of interest.
- */
- static VectorSiteContainer* getSitesToAnalyse(
- const SiteContainer& allSites,
- std::map<std::string, std::string>& params,
- std::string suffix = "",
- bool suffixIsOptional = true,
- bool gapAsUnknown = true,
- bool verbose = true);
+ /**
+ * @brief Retrieves sites suitable for the analysis.
+ *
+ * Options used are:
+ * - sequence.sites_to_use = [all|complete|nogap].
+ *
+ * If the 'complete' option is used, only fully resolve site will be taken
+ * into account.
+ * If the 'nogap' option is used, only sites without gap will be taken into
+ * account.
+ * If 'gapAsUnknown' is set to true and the all option is selected, gaps will
+ * be changed to 'unknown' character is sequences.
+ *
+ * - sequence.max_gap_allowed = [57%|30]
+ * If a % sign fallow the number, it is taken to be a frequence (in percent).
+ * This specify the maximum amount of gaps allowed for each site.
+ * Sites not satisfying this amount will be removed.
+ * A value of 100% will remove all gap-only sites, a value >100% will keep all sites.
+ *
+ * @param allSites The site container from which sites must be retrieved.
+ * @param params The attribute map where options may be found.
+ * @param suffix A suffix to be applied to each attribute name.
+ * @param suffixIsOptional Tell if the suffix is absolutely required.
+ * @param gapAsUnknown Convert gaps to unknown characters.
+ * @param verbose Print some info to the 'message' output stream.
+ * @param warn Set the warning level (0: always display warnings, >0 display warnings on demand).
+ * @return A new VectorSiteContainer object containing sites of interest.
+ */
+ static VectorSiteContainer* getSitesToAnalyse(
+ const SiteContainer& allSites,
+ std::map<std::string, std::string>& params,
+ std::string suffix = "",
+ bool suffixIsOptional = true,
+ bool gapAsUnknown = true,
+ bool verbose = true,
+ int warn = 1);
- /**
- * @brief Write a sequence file according to options.
- *
- * The supported sequence formats are Fasta and Mase.
- *
- * See the Bio++ program suite manual for a full description of the syntax.
- *
- * @see writeSequenceFile(SiteContainer) for writing alignments, with more output formats.
- *
- * @param sequences The sequences to write.
- * @param params The attribute map where options may be found.
- * @param suffix A suffix to be applied to each attribute name.
- * @param verbose Print some info to the 'message' output stream.
- */
- static void writeSequenceFile(
- const SequenceContainer& sequences,
- std::map<std::string, std::string>& params,
- const std::string& suffix = "",
- bool verbose = true);
+ /**
+ * @brief Write a sequence file according to options.
+ *
+ * The supported sequence formats are Fasta and Mase.
+ *
+ * See the Bio++ program suite manual for a full description of the syntax.
+ *
+ * @see writeSequenceFile(SiteContainer) for writing alignments, with more output formats.
+ *
+ * @param sequences The sequences to write.
+ * @param params The attribute map where options may be found.
+ * @param suffix A suffix to be applied to each attribute name.
+ * @param verbose Print some info to the 'message' output stream.
+ * @param warn Set the warning level (0: always display warnings, >0 display warnings on demand).
+ */
+ static void writeSequenceFile(
+ const SequenceContainer& sequences,
+ std::map<std::string, std::string>& params,
+ const std::string& suffix = "",
+ bool verbose = true,
+ int warn = 1);
- /**
- * @brief Write a sequence alignment file according to options.
- *
- * The supported sequence formats are Fasta, Mase and Phylip.
- *
- * See the Bio++ program suite manual for a full description of the syntax.
- *
- * @param sequences The aligned sequences to write.
- * @param params The attribute map where options may be found.
- * @param suffix A suffix to be applied to each attribute name.
- * @param verbose Print some info to the 'message' output stream.
- */
- static void writeAlignmentFile(
- const SiteContainer& sequences,
- std::map<std::string, std::string>& params,
- const std::string& suffix = "",
- bool verbose = true);
-};
+ /**
+ * @brief Write a sequence alignment file according to options.
+ *
+ * The supported sequence formats are Fasta, Mase and Phylip.
+ *
+ * See the Bio++ program suite manual for a full description of the syntax.
+ *
+ * @param sequences The aligned sequences to write.
+ * @param params The attribute map where options may be found.
+ * @param suffix A suffix to be applied to each attribute name.
+ * @param verbose Print some info to the 'message' output stream.
+ * @param warn Set the warning level (0: always display warnings, >0 display warnings on demand).
+ */
+ static void writeAlignmentFile(
+ const SiteContainer& sequences,
+ std::map<std::string, std::string>& params,
+ const std::string& suffix = "",
+ bool verbose = true,
+ int warn = 1);
+ };
} // end of namespace bpp.
#endif // _SEQUENCEAPPLICATIONTOOLS_H_
diff --git a/src/Bpp/Seq/CodonSiteTools.cpp b/src/Bpp/Seq/CodonSiteTools.cpp
index 9b7007f..c706a09 100644
--- a/src/Bpp/Seq/CodonSiteTools.cpp
+++ b/src/Bpp/Seq/CodonSiteTools.cpp
@@ -38,7 +38,6 @@
*/
#include "CodonSiteTools.h"
-#include "Alphabet/StandardCodonAlphabet.h"
#include "Alphabet/CodonAlphabet.h"
#include "Alphabet/DNA.h"
#include "Alphabet/AlphabetTools.h"
@@ -58,7 +57,7 @@ using namespace std;
/******************************************************************************/
-bool CodonSiteTools::hasGapOrStop(const Site& site) throw (AlphabetException)
+bool CodonSiteTools::hasGapOrStop(const Site& site, const GeneticCode& gCode) throw (AlphabetException)
{
// Alphabet checking
if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
@@ -73,15 +72,14 @@ bool CodonSiteTools::hasGapOrStop(const Site& site) throw (AlphabetException)
/******************************************************************************/
-bool CodonSiteTools::hasStop(const Site& site) throw (AlphabetException)
+bool CodonSiteTools::hasStop(const Site& site, const GeneticCode& gCode) throw (AlphabetException)
{
// Alphabet checking
if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
throw AlphabetException("CodonSiteTools::hasStop: alphabet is not CodonAlphabet", site.getAlphabet());
- const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(site.getAlphabet());
for (size_t i = 0; i < site.size(); i++)
{
- if (ca->isStop(site[i]))
+ if (gCode.isStop(site[i]))
return true;
}
return false;
@@ -127,14 +125,14 @@ bool CodonSiteTools::isMonoSitePolymorphic(const Site& site) throw (AlphabetExce
/******************************************************************************/
-bool CodonSiteTools::isSynonymousPolymorphic(const Site& site, const GeneticCode& gc)
+bool CodonSiteTools::isSynonymousPolymorphic(const Site& site, const GeneticCode& gCode)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
{
// Alphabet checking
if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
throw AlphabetException("CodonSiteTools::isSynonymousPolymorphic: alphabet is not CodonAlphabet", site.getAlphabet());
- if (typeid(site.getAlphabet()) != typeid(gc.getSourceAlphabet()))
- throw AlphabetMismatchException("CodonSiteTools::isSynonymousPolymorphic: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gc.getSourceAlphabet());
+ if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
+ throw AlphabetMismatchException("CodonSiteTools::isSynonymousPolymorphic: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
// Empty site checking
if (site.size() == 0)
throw EmptySiteException("CodonSiteTools::isSynonymousPolymorphic: Incorrect specified site", &site);
@@ -145,10 +143,10 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
// Synonymous polymorphism checking
vector<int> prot;
- int first_aa = gc.translate(site[0]);
+ int first_aa = gCode.translate(site[0]);
for (size_t i = 1; i < site.size(); i++)
{
- int aa = gc.translate(site[i]);
+ int aa = gCode.translate(site[i]);
if (aa != first_aa)
return false;
}
@@ -157,7 +155,7 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
/******************************************************************************/
-Site* CodonSiteTools::generateCodonSiteWithoutRareVariant(const Site& site, double freqmin)
+Site* CodonSiteTools::generateCodonSiteWithoutRareVariant(const Site& site, const GeneticCode& gCode, double freqmin)
throw (AlphabetException, EmptySiteException)
{
// Alphabet checking
@@ -182,7 +180,7 @@ throw (AlphabetException, EmptySiteException)
int newcodon = -1;
for (map<int, double>::iterator it = freqcodon.begin(); it != freqcodon.end(); it++)
{
- if (it->second > freqmin && !ca->isStop(it->first))
+ if (it->second > freqmin && !gCode.isStop(it->first))
{
newcodon = it->first;
break;
@@ -233,9 +231,9 @@ size_t CodonSiteTools::numberOfDifferences(int i, int j, const CodonAlphabet& ca
/******************************************************************************/
-double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const GeneticCode& gc, bool minchange)
+double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const GeneticCode& gCode, bool minchange)
{
- const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(gc.getSourceAlphabet());
+ const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(gCode.getSourceAlphabet());
vector<int> ci = ca->getPositions(i);
vector<int> cj = ca->getPositions(j);
@@ -244,13 +242,13 @@ double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const Genetic
case 0: return 0;
case 1:
{
- if (gc.areSynonymous(i, j))
+ if (gCode.areSynonymous(i, j))
return 1;
return 0;
}
case 2:
{
- if (gc.areSynonymous(i, j))
+ if (gCode.areSynonymous(i, j))
return 2;
vector<double> path(2, 0); // Vector of number of synonymous changes per path (2 here)
vector<double> weight(2, 1); // Weight to exclude path through stop codon
@@ -258,20 +256,20 @@ double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const Genetic
{
int trans1 = ca->getCodon(ci[0], cj[1], ci[2]); // transitory codon between NcNiNi et NcNjNj: NcNjNi, Nc = identical site
int trans2 = ca->getCodon(ci[0], ci[1], cj[2]); // transitory codon between NcNiNi et NcNjNj: NcNiNj, Nc = identical site
- if (!ca->isStop(trans1))
+ if (!gCode.isStop(trans1))
{
- if (gc.areSynonymous(i, trans1))
+ if (gCode.areSynonymous(i, trans1))
path[0]++;
- if (gc.areSynonymous(trans1, j))
+ if (gCode.areSynonymous(trans1, j))
path[0]++;
}
else
weight[0] = 0;
- if (!ca->isStop(trans2))
+ if (!gCode.isStop(trans2))
{
- if (gc.areSynonymous(i, trans2))
+ if (gCode.areSynonymous(i, trans2))
path[1]++;
- if (gc.areSynonymous(trans2, j))
+ if (gCode.areSynonymous(trans2, j))
path[1]++;
}
else
@@ -281,20 +279,20 @@ double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const Genetic
{
int trans1 = ca->getCodon(cj[0], ci[1], ci[2]); // transitory codon between NiNcNi et NjNcNj: NjNcNi, Nc = identical site
int trans2 = ca->getCodon(ci[0], ci[1], cj[2]); // transitory codon between NiNcNi et NjNcNj: NiNcNj, Nc = identical site
- if (!ca->isStop(trans1))
+ if (!gCode.isStop(trans1))
{
- if (gc.areSynonymous(i, trans1))
+ if (gCode.areSynonymous(i, trans1))
path[0]++;
- if (gc.areSynonymous(trans1, j))
+ if (gCode.areSynonymous(trans1, j))
path[0]++;
}
else
weight[0] = 0;
- if (!ca->isStop(trans2))
+ if (!gCode.isStop(trans2))
{
- if (gc.areSynonymous(i, trans2))
+ if (gCode.areSynonymous(i, trans2))
path[1]++;
- if (gc.areSynonymous(trans2, j))
+ if (gCode.areSynonymous(trans2, j))
path[1]++;
}
else
@@ -304,20 +302,20 @@ double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const Genetic
{
int trans1 = ca->getCodon(cj[0], ci[1], ci[2]); // transitory codon between NiNiNc et NjNjNc: NjNiNc, Nc = identical site
int trans2 = ca->getCodon(ci[0], cj[1], ci[2]); // transitory codon between NiNiNc et NjNjNc: NiNjNc, Nc = identical site
- if (!ca->isStop(trans1))
+ if (!gCode.isStop(trans1))
{
- if (gc.areSynonymous(i, trans1))
+ if (gCode.areSynonymous(i, trans1))
path[0]++;
- if (gc.areSynonymous(trans1, j))
+ if (gCode.areSynonymous(trans1, j))
path[0]++;
}
else
weight[0] = 0;
- if (!ca->isStop(trans2))
+ if (!gCode.isStop(trans2))
{
- if (gc.areSynonymous(i, trans2))
+ if (gCode.areSynonymous(i, trans2))
path[1]++;
- if (gc.areSynonymous(trans2, j))
+ if (gCode.areSynonymous(trans2, j))
path[1]++;
}
else
@@ -347,26 +345,26 @@ double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const Genetic
int trans101 = ca->getCodon(cj[0], ci[1], cj[2]);
int trans011 = ca->getCodon(ci[0], cj[1], cj[2]);
// Paths
- if (!ca->isStop(trans100))
+ if (!gCode.isStop(trans100))
{
- if (gc.areSynonymous(i, trans100))
+ if (gCode.areSynonymous(i, trans100))
{
path[0]++; path[1]++;
}
- if (!ca->isStop(trans110))
+ if (!gCode.isStop(trans110))
{
- if (gc.areSynonymous(trans100, trans110))
+ if (gCode.areSynonymous(trans100, trans110))
path[0]++;
- if (gc.areSynonymous(trans110, j))
+ if (gCode.areSynonymous(trans110, j))
path[0]++;
}
else
weight[0] = 0;
- if (!ca->isStop(trans101))
+ if (!gCode.isStop(trans101))
{
- if (gc.areSynonymous(trans100, trans101))
+ if (gCode.areSynonymous(trans100, trans101))
path[1]++;
- if (gc.areSynonymous(trans101, j))
+ if (gCode.areSynonymous(trans101, j))
path[1]++;
}
else
@@ -376,26 +374,26 @@ double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const Genetic
{
weight[0] = 0; weight[1] = 0;
}
- if (!ca->isStop(trans010))
+ if (!gCode.isStop(trans010))
{
- if (gc.areSynonymous(i, trans010))
+ if (gCode.areSynonymous(i, trans010))
{
path[2]++; path[3]++;
}
- if (!ca->isStop(trans110))
+ if (!gCode.isStop(trans110))
{
- if (gc.areSynonymous(trans010, trans110))
+ if (gCode.areSynonymous(trans010, trans110))
path[2]++;
- if (gc.areSynonymous(trans110, j))
+ if (gCode.areSynonymous(trans110, j))
path[2]++;
}
else
weight[2] = 0;
- if (!ca->isStop(trans011))
+ if (!gCode.isStop(trans011))
{
- if (gc.areSynonymous(trans010, trans011))
+ if (gCode.areSynonymous(trans010, trans011))
path[3]++;
- if (gc.areSynonymous(trans011, j))
+ if (gCode.areSynonymous(trans011, j))
path[3]++;
}
else
@@ -405,26 +403,26 @@ double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const Genetic
{
weight[2] = 0; weight[3] = 0;
}
- if (!ca->isStop(trans001))
+ if (!gCode.isStop(trans001))
{
- if (gc.areSynonymous(i, trans001))
+ if (gCode.areSynonymous(i, trans001))
{
path[4]++; path[5]++;
}
- if (!ca->isStop(trans101))
+ if (!gCode.isStop(trans101))
{
- if (gc.areSynonymous(trans001, trans101))
+ if (gCode.areSynonymous(trans001, trans101))
path[4]++;
- if (gc.areSynonymous(trans101, j))
+ if (gCode.areSynonymous(trans101, j))
path[4]++;
}
else
weight[4] = 0;
- if (!ca->isStop(trans011))
+ if (!gCode.isStop(trans011))
{
- if (gc.areSynonymous(trans001, trans011))
+ if (gCode.areSynonymous(trans001, trans011))
path[5]++;
- if (gc.areSynonymous(trans011, j))
+ if (gCode.areSynonymous(trans011, j))
path[5]++;
}
else
@@ -452,14 +450,14 @@ double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const Genetic
/******************************************************************************/
-double CodonSiteTools::piSynonymous(const Site& site, const GeneticCode& gc, bool minchange)
+double CodonSiteTools::piSynonymous(const Site& site, const GeneticCode& gCode, bool minchange)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
{
// Alphabet checking
if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
throw AlphabetException("CodonSiteTools::piSynonymous: alphabet is not CodonAlphabet", site.getAlphabet());
- if (typeid(site.getAlphabet()) != typeid(gc.getSourceAlphabet()))
- throw AlphabetMismatchException("CodonSiteTools::piSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gc.getSourceAlphabet());
+ if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
+ throw AlphabetMismatchException("CodonSiteTools::piSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
// Empty site checking
if (site.size() == 0)
throw EmptySiteException("CodonSiteTools::piSynonymous: Incorrect specified site", &site);
@@ -475,7 +473,7 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
{
for (map<int, double>::iterator it2 = freq.begin(); it2 != freq.end(); it2++)
{
- pi += (it1->second) * (it2->second) * (numberOfSynonymousDifferences(it1->first, it2->first, gc, minchange));
+ pi += (it1->second) * (it2->second) * (numberOfSynonymousDifferences(it1->first, it2->first, gCode, minchange));
}
}
size_t n = site.size();
@@ -484,14 +482,14 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
/******************************************************************************/
-double CodonSiteTools::piNonSynonymous(const Site& site, const GeneticCode& gc, bool minchange)
+double CodonSiteTools::piNonSynonymous(const Site& site, const GeneticCode& gCode, bool minchange)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
{
// Alphabet checking
if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
throw AlphabetException("CodonSiteTools::piNonSynonymous: alphabet is not CodonAlphabet", site.getAlphabet());
- if (typeid(site.getAlphabet()) != typeid(gc.getSourceAlphabet()))
- throw AlphabetMismatchException("CodonSiteTools::piNonSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gc.getSourceAlphabet());
+ if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
+ throw AlphabetMismatchException("CodonSiteTools::piNonSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
// Empty site checking
if (site.size() == 0)
throw EmptySiteException("CodonSiteTools::piSynonymous: Incorrect specified site", &site);
@@ -499,7 +497,7 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
// General polymorphism checking
if (SiteTools::isConstant(site))
return 0;
- if (isSynonymousPolymorphic(site, gc))
+ if (isSynonymousPolymorphic(site, gCode))
return 0;
// Computation
map<int, double> freq;
@@ -511,7 +509,7 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
for (map<int, double>::iterator it2 = freq.begin(); it2 != freq.end(); it2++)
{
double nbtot = static_cast<double>(numberOfDifferences(it1->first, it2->first, *ca));
- double nbsyn = numberOfSynonymousDifferences(it1->first, it2->first, gc, minchange);
+ double nbsyn = numberOfSynonymousDifferences(it1->first, it2->first, gCode, minchange);
pi += (it1->second) * (it2->second) * (nbtot - nbsyn);
}
}
@@ -521,62 +519,55 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
/******************************************************************************/
-double CodonSiteTools::numberOfSynonymousPositions(int i, const GeneticCode& gc, double ratio) throw (Exception)
+double CodonSiteTools::numberOfSynonymousPositions(int i, const GeneticCode& gCode, double ratio) throw (Exception)
{
- try
+ const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(gCode.getSourceAlphabet());
+ if (gCode.isStop(i))
+ return 0;
+ if (ca->isUnresolved(i))
+ return 0;
+ double nbsynpos = 0.0;
+ vector<int> codon = ca->getPositions(i);
+ int acid = gCode.translate(i);
+ for (size_t pos = 0; pos < 3; ++pos)
{
- const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(gc.getSourceAlphabet());
- if (ca->isStop(i))
- return 0;
- if (ca->isUnresolved(i))
- return 0;
- double nbsynpos = 0.0;
- vector<int> codon = ca->getPositions(i);
- int acid = gc.translate(i);
- for (int pos = 0; pos < 3; pos++)
+ for (int an = 0; an < 4; ++an)
{
- for (int an = 0; an < 4; an++)
+ if (an == codon[pos])
+ continue;
+ vector<int> mutcodon = codon;
+ mutcodon[pos] = an;
+ int intcodon = ca->getCodon(mutcodon[0], mutcodon[1], mutcodon[2]);
+ if (gCode.isStop(intcodon))
+ continue;
+ int altacid = gCode.translate(intcodon);
+ if (altacid == acid) // if synonymous
{
- if (an == codon[pos])
- continue;
- vector<int> mutcodon = codon;
- mutcodon[pos] = an;
- int intcodon = ca->getCodon(mutcodon[0], mutcodon[1], mutcodon[2]);
- if (ca->isStop(intcodon))
- continue;
- int altacid = gc.translate(intcodon);
- if (altacid == acid) // if synonymous
+ if (((codon[pos] == 0 || codon[pos] == 2) && (mutcodon[pos] == 1 || mutcodon[pos] == 3)) ||
+ ((codon[pos] == 1 || codon[pos] == 3) && (mutcodon[pos] == 0 || mutcodon[pos] == 2))) // if it is a transversion
{
- if (((codon[pos] == 0 || codon[pos] == 2) && (mutcodon[pos] == 1 || mutcodon[pos] == 3)) ||
- ((codon[pos] == 1 || codon[pos] == 3) && (mutcodon[pos] == 0 || mutcodon[pos] == 2))) // if it is a transversion
- {
- nbsynpos = nbsynpos + 1 / (ratio + 2);
- }
- else // if transition
- {
- nbsynpos = nbsynpos + ratio / (ratio + 2);
- }
+ nbsynpos = nbsynpos + 1 / (ratio + 2);
+ }
+ else // if transition
+ {
+ nbsynpos = nbsynpos + ratio / (ratio + 2);
}
}
}
- return nbsynpos;
}
- catch (...)
- {} // !!!!! en cas d'exception, plante! il faudrait forwarder l'exception
- // This line is never reached but sends a warning if not there:
- return 0.;
+ return nbsynpos;
}
/******************************************************************************/
-double CodonSiteTools::meanNumberOfSynonymousPositions(const Site& site, const GeneticCode& gc, double ratio)
+double CodonSiteTools::meanNumberOfSynonymousPositions(const Site& site, const GeneticCode& gCode, double ratio)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
{
// Alphabet checking
if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
throw AlphabetException("CodonSiteTools::meanNumberOfSynonymousPositions: alphabet is not CodonAlphabet", site.getAlphabet());
- if (typeid(site.getAlphabet()) != typeid(gc.getSourceAlphabet()))
- throw AlphabetMismatchException("CodonSiteTools::meanNumberOfSynonymousPositions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gc.getSourceAlphabet());
+ if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
+ throw AlphabetMismatchException("CodonSiteTools::meanNumberOfSynonymousPositions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
// Empty site checking
if (site.size() == 0)
throw EmptySiteException("CodonSiteTools::meanNumberOfSynonymousPositions: Incorrect specified site", &site);
@@ -587,14 +578,14 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
SiteTools::getFrequencies(site, freq);
for (map<int, double>::iterator it = freq.begin(); it != freq.end(); it++)
{
- NbSyn += (it->second) * numberOfSynonymousPositions(it->first, gc, ratio);
+ NbSyn += (it->second) * numberOfSynonymousPositions(it->first, gCode, ratio);
}
return NbSyn;
}
/******************************************************************************/
-size_t CodonSiteTools::numberOfSubsitutions(const Site& site, double freqmin)
+size_t CodonSiteTools::numberOfSubsitutions(const Site& site, const GeneticCode& gCode, double freqmin)
throw (AlphabetException, EmptySiteException)
{
// Alphabet checking
@@ -608,7 +599,7 @@ throw (AlphabetException, EmptySiteException)
return 0;
Site* newsite;
if (freqmin > 1. / static_cast<double>(site.size()))
- newsite = CodonSiteTools::generateCodonSiteWithoutRareVariant(site, freqmin);
+ newsite = CodonSiteTools::generateCodonSiteWithoutRareVariant(site, gCode, freqmin);
else
newsite = new Site(site);
// Computation
@@ -639,14 +630,14 @@ throw (AlphabetException, EmptySiteException)
/******************************************************************************/
-size_t CodonSiteTools::numberOfNonSynonymousSubstitutions(const Site& site, const GeneticCode& gc, double freqmin)
+size_t CodonSiteTools::numberOfNonSynonymousSubstitutions(const Site& site, const GeneticCode& gCode, double freqmin)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
{
// Alphabet checking
if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
throw AlphabetException("CodonSiteTools::numberOfNonSynonymousSubstitutions: alphabet is not CodonAlphabet", site.getAlphabet());
- if (typeid(site.getAlphabet()) != typeid(gc.getSourceAlphabet()))
- throw AlphabetMismatchException("CodonSiteTools::numberOfNonSynonymousSubstitutions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gc.getSourceAlphabet());
+ if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
+ throw AlphabetMismatchException("CodonSiteTools::numberOfNonSynonymousSubstitutions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
// Empty site checking
if (site.size() == 0)
throw EmptySiteException("CodonSiteTools::numberOfNonSynonymousSubstitutions: Incorrect specified site", &site);
@@ -655,7 +646,7 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
return 0;
Site* newsite;
if (freqmin > 1. / static_cast<double>(site.size()))
- newsite = generateCodonSiteWithoutRareVariant(site, freqmin);
+ newsite = generateCodonSiteWithoutRareVariant(site, gCode, freqmin);
else
newsite = new Site(site);
if (SiteTools::hasGap(*newsite))
@@ -674,7 +665,7 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
for (map<int, size_t>::iterator it2 = count.begin(); it2 != count.end(); it2++)
{
size_t Ntot = numberOfDifferences(it1->first, it2->first, *ca);
- size_t Ns = (size_t)numberOfSynonymousDifferences(it1->first, it2->first, gc, true);
+ size_t Ns = (size_t)numberOfSynonymousDifferences(it1->first, it2->first, gCode, true);
if (Nmin > Ntot - Ns && it1->first != it2->first)
Nmin = Ntot - Ns;
}
@@ -688,7 +679,7 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
/******************************************************************************/
-vector<size_t> CodonSiteTools::fixedDifferences(const Site& siteIn, const Site& siteOut, int i, int j, const GeneticCode& gc)
+vector<size_t> CodonSiteTools::fixedDifferences(const Site& siteIn, const Site& siteOut, int i, int j, const GeneticCode& gCode)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
{
// Alphabet checking
@@ -696,20 +687,20 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
throw AlphabetException("CodonSiteTools::fixedDifferences: alphabet is not CodonAlphabet (siteIn)", siteIn.getAlphabet());
if (!AlphabetTools::isCodonAlphabet(siteOut.getAlphabet()))
throw AlphabetException("CodonSiteTools::fixedDifferences: alphabet is not CodonAlphabet (siteOut)", siteOut.getAlphabet());
- if (typeid(siteIn.getAlphabet()) != typeid(gc.getSourceAlphabet()))
- throw AlphabetMismatchException("CodonSiteTools::fixedDifferences: siteIn and genetic code have not the same codon alphabet.", siteIn.getAlphabet(), gc.getSourceAlphabet());
- if (typeid(siteOut.getAlphabet()) != typeid(gc.getSourceAlphabet()))
- throw AlphabetMismatchException("CodonSiteTools::fixedDifferences: siteOut and genetic code have not the same codon alphabet.", siteOut.getAlphabet(), gc.getSourceAlphabet());
+ if (!siteIn.getAlphabet()->equals(*gCode.getSourceAlphabet()))
+ throw AlphabetMismatchException("CodonSiteTools::fixedDifferences: siteIn and genetic code have not the same codon alphabet.", siteIn.getAlphabet(), gCode.getSourceAlphabet());
+ if (!siteOut.getAlphabet()->equals(*gCode.getSourceAlphabet()))
+ throw AlphabetMismatchException("CodonSiteTools::fixedDifferences: siteOut and genetic code have not the same codon alphabet.", siteOut.getAlphabet(), gCode.getSourceAlphabet());
// Empty site checking
if (siteIn.size() == 0)
throw EmptySiteException("CodonSiteTools::getFixedDifferences Incorrect specified site", &siteIn);
if (siteOut.size() == 0)
throw EmptySiteException("CodonSiteTools::getFixedDifferences Incorrect specified site", &siteOut);
- const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(gc.getSourceAlphabet());
+ const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(gCode.getSourceAlphabet());
size_t Ntot = numberOfDifferences(i, j, *ca);
- size_t Ns = (size_t) numberOfSynonymousDifferences(i, j, gc, true);
+ size_t Ns = (size_t) numberOfSynonymousDifferences(i, j, gCode, true);
size_t Na = Ntot - Ns;
size_t Nfix = Ntot;
vector<int> pos1in, pos2in, pos3in, pos1out, pos2out, pos3out;
@@ -813,17 +804,17 @@ throw (AlphabetException, AlphabetMismatchException, EmptySiteException)
/******************************************************************************/
-bool CodonSiteTools::isFourFoldDegenerated(const Site& site, const GeneticCode& gc)
+bool CodonSiteTools::isFourFoldDegenerated(const Site& site, const GeneticCode& gCode)
{
if (!SiteTools::isConstant(site, true))
{
/** If non-synonymous mutation **/
- if (!(CodonSiteTools::isSynonymousPolymorphic(site, gc)))
+ if (!(CodonSiteTools::isSynonymousPolymorphic(site, gCode)))
return false;
for (size_t i = 0; i < site.size(); i++)
{
- if (!(gc.isFourFoldDegenerated(site.getValue(i))))
+ if (!(gCode.isFourFoldDegenerated(site.getValue(i))))
{
return false;
}
@@ -833,7 +824,7 @@ bool CodonSiteTools::isFourFoldDegenerated(const Site& site, const GeneticCode&
{
for (size_t i = 0; i < site.size(); i++)
{
- if (!(gc.isFourFoldDegenerated(site.getValue(i))))
+ if (!(gCode.isFourFoldDegenerated(site.getValue(i))))
{
return false;
}
diff --git a/src/Bpp/Seq/CodonSiteTools.h b/src/Bpp/Seq/CodonSiteTools.h
index bb7cd2a..51d0857 100644
--- a/src/Bpp/Seq/CodonSiteTools.h
+++ b/src/Bpp/Seq/CodonSiteTools.h
@@ -70,17 +70,19 @@ class CodonSiteTools:
* @brief Method to know if a codon site contains gap(s) or stop codons.
*
* @param site a Site
+ * @param gCode The genetic code according to which stop codons are specified.
* @throw AlphabetException If the alphabet associated to the site is not a codon alphabet.
*/
- static bool hasGapOrStop(const Site & site) throw (AlphabetException);
+ static bool hasGapOrStop(const Site& site, const GeneticCode& gCode) throw (AlphabetException);
/**
* @brief Method to know if a codon site contains stop codon or not
*
* @param site a Site
+ * @param gCode The genetic code according to which stop codons are specified.
* @throw AlphabetException If the alphabet associated to the site is not a codon alphabet.
*/
- static bool hasStop(const Site & site) throw (AlphabetException);
+ static bool hasStop(const Site& site, const GeneticCode& gCode) throw (AlphabetException);
/**
* @brief Method to know if a polymorphic codon site is polymorphic at only one site
@@ -89,18 +91,18 @@ class CodonSiteTools:
* @throw AlphabetException If the alphabet associated to the site is not a codon alphabet.
* @throw EmptySiteException If the site has size 0.
*/
- static bool isMonoSitePolymorphic(const Site & site) throw (AlphabetException, EmptySiteException);
+ static bool isMonoSitePolymorphic(const Site& site) throw (AlphabetException, EmptySiteException);
/**
* @brief Method to know if polymorphism at a codon site is synonymous
*
* @param site a Site
- * @param gc a GeneticCode
+ * @param gCode a GeneticCode
* @throw AlphabetException If the alphabet associated to the site is not a codon alphabet.
* @throw AlphabetMismatchException If the codon alphabet of the site do not match the codon alphabet of the genetic code.
* @throw EmptySiteException If the site has size 0.
*/
- static bool isSynonymousPolymorphic(const Site & site, const GeneticCode & gc)
+ static bool isSynonymousPolymorphic(const Site& site, const GeneticCode& gCode)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException);
/**
@@ -112,12 +114,13 @@ class CodonSiteTools:
* For an application, see for example (Fay et al. 2001, Genetics 158 pp 1227-1234).
*
* @param site a Site
+ * @param gCode The genetic code according to which stop codons are specified.
* @param freqmin a double, allele in frequency stricly lower than freqmin are replaced
* @throw AlphabetException If the alphabet associated to the site is not a codon alphabet.
* @throw EmptySiteException If the site has size 0.
*
*/
- static Site * generateCodonSiteWithoutRareVariant(const Site & site, double freqmin)
+ static Site* generateCodonSiteWithoutRareVariant(const Site& site, const GeneticCode& gCode, double freqmin)
throw(AlphabetException, EmptySiteException);
@@ -128,7 +131,7 @@ class CodonSiteTools:
* @param j a int
* @param ca a CodonAlphabet
*/
- static size_t numberOfDifferences(int i, int j, const CodonAlphabet & ca);
+ static size_t numberOfDifferences(int i, int j, const CodonAlphabet& ca);
/**
* @brief Compute the number of synonymous differences between two codons
@@ -139,10 +142,10 @@ class CodonSiteTools:
* Paths included stop codons are excluded.
* @param i a int
* @param j a int
- * @param gc a GeneticCode
+ * @param gCode a GeneticCode
* @param minchange a boolean set by default to false
*/
- static double numberOfSynonymousDifferences(int i, int j, const GeneticCode & gc, bool minchange=false);
+ static double numberOfSynonymousDifferences(int i, int j, const GeneticCode& gCode, bool minchange=false);
/**
* @brief Compute the synonymous pi per codon site
@@ -159,13 +162,13 @@ class CodonSiteTools:
* If minchange = true the path with the minimum number of non-synonymous change is chosen.
*
* @param site a Site
- * @param gc a GeneticCode
+ * @param gCode a GeneticCode
* @param minchange a boolean set by default to false
* @throw AlphabetException If the alphabet associated to the site is not a codon alphabet.
* @throw AlphabetMismatchException If the codon alphabet of the site do not match the codon alphabet of the genetic code.
* @throw EmptySiteException If the site has size 0.
*/
- static double piSynonymous(const Site & site, const GeneticCode & gc, bool minchange=false)
+ static double piSynonymous(const Site& site, const GeneticCode& gCode, bool minchange = false)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException);
/**
@@ -182,13 +185,13 @@ class CodonSiteTools:
* If minchange = true the path with the minimum number of non-synonymous change is chosen.
*
* @param site a Site
- * @param gc a GeneticCode
+ * @param gCode a GeneticCode
* @param minchange a boolean set by default to false
* @throw AlphabetException If the alphabet associated to the site is not a codon alphabet.
* @throw AlphabetMismatchException If the codon alphabet of the site do not match the codon alphabet of the genetic code.
* @throw EmptySiteException If the site has size 0.
*/
- static double piNonSynonymous(const Site & site, const GeneticCode & gc, bool minchange = false)
+ static double piNonSynonymous(const Site& site, const GeneticCode& gCode, bool minchange = false)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException);
@@ -201,10 +204,10 @@ class CodonSiteTools:
* Unresolved codons and stop codon will return a value of 0.
*
* @param i a int
- * @param gc a GeneticCode
+ * @param gCode a GeneticCode
* @param ratio a double set by default to 1
*/
- static double numberOfSynonymousPositions(int i, const GeneticCode & gc, double ratio=1.0) throw (Exception);
+ static double numberOfSynonymousPositions(int i, const GeneticCode& gCode, double ratio = 1.0) throw (Exception);
/**
* @brief Return the mean number of synonymous positions per codon site
@@ -216,13 +219,13 @@ class CodonSiteTools:
* Unresolved and stop codons are counted as 0.
*
* @param site a Site
- * @param gc a GeneticCode
+ * @param gCode a GeneticCode
* @param ratio a double Set by default to 1
* @throw AlphabetException If the alphabet associated to the site is not a codon alphabet.
* @throw AlphabetMismatchException If the codon alphabet of the site do not match the codon alphabet of the genetic code.
* @throw EmptySiteException If the site has size 0.
*/
- static double meanNumberOfSynonymousPositions(const Site & site, const GeneticCode & gc, double ratio=1)
+ static double meanNumberOfSynonymousPositions(const Site& site, const GeneticCode& gCode, double ratio = 1)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException);
/**
@@ -246,11 +249,12 @@ class CodonSiteTools:
* Rare variants (<= freqmin) can be excluded.
*
* @param site a Site
+ * @param gCode a GeneticCode
* @param freqmin a double To exclude snp in frequency strictly lower than freqmin (by default freqmin = 0)
* @throw AlphabetException If the alphabet associated to the site is not a codon alphabet.
* @throw EmptySiteException If the site has size 0.
*/
- static size_t numberOfSubsitutions(const Site & site, double freqmin = 0.)
+ static size_t numberOfSubsitutions(const Site& site, const GeneticCode& gCode, double freqmin = 0.)
throw(AlphabetException, EmptySiteException);
/**
@@ -264,16 +268,15 @@ class CodonSiteTools:
* Otherwise, a non-integer number could be return.
*
* @param site a Site
- * @param gc a GeneticCode
+ * @param gCode a GeneticCode
* @param freqmin a double To exclude snp in frequency strictly lower than freqmin (by default freqmin = 0).
* @throw AlphabetException If the alphabet associated to the site is not a codon alphabet.
* @throw AlphabetMismatchException If the codon alphabet of the site do not match the codon alphabet of the genetic code.
* @throw EmptySiteException If the site has size 0.
*/
- static size_t numberOfNonSynonymousSubstitutions(const Site & site, const GeneticCode & gc, double freqmin = 0.)
+ static size_t numberOfNonSynonymousSubstitutions(const Site& site, const GeneticCode& gCode, double freqmin = 0.)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException);
-
/**
* @brief Return a vector with the number of fixed synonymous and non-synonymous differences per codon site
*
@@ -305,23 +308,22 @@ class CodonSiteTools:
* @param siteOut a Site
* @param i an integer
* @param j an integer
- * @param gc a GeneticCode
+ * @param gCode a GeneticCode
* @throw AlphabetException If the alphabet associated to one of the sites is not a codon alphabet.
* @throw AlphabetMismatchException If the codon alphabet each the site do not match the codon alphabet of the genetic code.
* @throw EmptySiteException If one of the sites has size 0.
*/
- static std::vector<size_t> fixedDifferences(const Site & siteIn, const Site & siteOut, int i, int j, const GeneticCode & gc)
+ static std::vector<size_t> fixedDifferences(const Site& siteIn, const Site& siteOut, int i, int j, const GeneticCode& gCode)
throw (AlphabetException, AlphabetMismatchException, EmptySiteException);
-
/**
* @return True if all sequences have a fourfold degenerated codon in the site
* (that is, if a mutation in the fourth position does not change the aminoacid).
* @author Benoit Nabholz, Annabelle Haudry
* @param site The site to analyze.
- * @param gc The genetic code to use.
+ * @param gCode The genetic code to use.
*/
- static bool isFourFoldDegenerated(const Site& site, const GeneticCode& gc);
+ static bool isFourFoldDegenerated(const Site& site, const GeneticCode& gCode);
};
diff --git a/src/Bpp/Seq/Container/AlignedSequenceContainer.cpp b/src/Bpp/Seq/Container/AlignedSequenceContainer.cpp
index 5ae8aa4..64b4275 100644
--- a/src/Bpp/Seq/Container/AlignedSequenceContainer.cpp
+++ b/src/Bpp/Seq/Container/AlignedSequenceContainer.cpp
@@ -203,13 +203,13 @@ Site* AlignedSequenceContainer::removeSite(size_t pos) throw (IndexOutOfBoundsEx
}
// Delete site's position
- positions_.erase(positions_.begin() + pos);
+ positions_.erase(positions_.begin() + static_cast<ptrdiff_t>(pos));
length_--;
// Actualizes the 'sites' vector:
if (sites_[pos])
delete sites_[pos];
- sites_.erase(sites_.begin() + pos);
+ sites_.erase(sites_.begin() + static_cast<ptrdiff_t>(pos));
// Send result
return old;
@@ -229,13 +229,13 @@ void AlignedSequenceContainer::deleteSite(size_t pos) throw (IndexOutOfBoundsExc
}
// Delete site's position
- positions_.erase(positions_.begin() + pos);
+ positions_.erase(positions_.begin() + static_cast<ptrdiff_t>(pos));
length_--;
// Actualizes the 'sites' vector:
if (sites_[pos])
delete sites_[pos];
- sites_.erase(sites_.begin() + pos);
+ sites_.erase(sites_.begin() + static_cast<ptrdiff_t>(pos));
}
/******************************************************************************/
@@ -252,7 +252,8 @@ void AlignedSequenceContainer::deleteSites(size_t siteIndex, size_t length) thro
}
// Delete site's siteIndexition
- positions_.erase(positions_.begin() + siteIndex, positions_.begin() + siteIndex + length);
+ positions_.erase(positions_.begin() + static_cast<ptrdiff_t>(siteIndex),
+ positions_.begin() + static_cast<ptrdiff_t>(siteIndex + length));
length_ -= length;
// Actualizes the 'sites' vector:
@@ -261,7 +262,8 @@ void AlignedSequenceContainer::deleteSites(size_t siteIndex, size_t length) thro
if (sites_[i])
delete sites_[i];
}
- sites_.erase(sites_.begin() + siteIndex, sites_.begin() + siteIndex + length);
+ sites_.erase(sites_.begin() + static_cast<ptrdiff_t>(siteIndex),
+ sites_.begin() + static_cast<ptrdiff_t>(siteIndex + length));
}
/******************************************************************************/
@@ -381,10 +383,10 @@ void AlignedSequenceContainer::addSite(const Site& site, size_t siteIndex, bool
}
length_++;
- positions_.insert(positions_.begin() + siteIndex, position);
+ positions_.insert(positions_.begin() + static_cast<ptrdiff_t>(siteIndex), position);
// Actualizes the 'sites' vector:
- sites_.insert(sites_.begin() + siteIndex, 0);
+ sites_.insert(sites_.begin() + static_cast<ptrdiff_t>(siteIndex), 0);
}
/******************************************************************************/
@@ -422,10 +424,10 @@ void AlignedSequenceContainer::addSite(const Site& site, size_t siteIndex, int p
}
length_++;
- positions_.insert(positions_.begin() + siteIndex, position);
+ positions_.insert(positions_.begin() + static_cast<ptrdiff_t>(siteIndex), position);
// Actualizes the 'sites' vector:
- sites_.insert(sites_.begin() + siteIndex, 0);
+ sites_.insert(sites_.begin() + static_cast<ptrdiff_t>(siteIndex), 0);
}
/******************************************************************************/
diff --git a/src/Bpp/Seq/Container/AlignedSequenceContainer.h b/src/Bpp/Seq/Container/AlignedSequenceContainer.h
index 57a2f05..10824ba 100644
--- a/src/Bpp/Seq/Container/AlignedSequenceContainer.h
+++ b/src/Bpp/Seq/Container/AlignedSequenceContainer.h
@@ -100,6 +100,7 @@ class AlignedSequenceContainer:
{
reindexSites();
}
+
/**
* @brief Copy constructor.
*
diff --git a/src/Bpp/Seq/Container/CompressedVectorSiteContainer.cpp b/src/Bpp/Seq/Container/CompressedVectorSiteContainer.cpp
index 4c8cb67..427ed8c 100644
--- a/src/Bpp/Seq/Container/CompressedVectorSiteContainer.cpp
+++ b/src/Bpp/Seq/Container/CompressedVectorSiteContainer.cpp
@@ -91,7 +91,7 @@ CompressedVectorSiteContainer::CompressedVectorSiteContainer(size_t size, const
// Seq names and comments:
for (size_t i = 0; i < size; i++)
{
- names_[i] = "Seq_" + i;
+ names_[i] = "Seq_" + TextTools::toString(i);
comments_[i] = new Comments();
}
}
@@ -272,7 +272,7 @@ void CompressedVectorSiteContainer::setSite(size_t pos, const Site& site, bool c
{
//There was no other site pointing toward this pattern, so we remove it.
delete sites_[current];
- sites_.erase(sites_.begin() + current);
+ sites_.erase(sites_.begin() + static_cast<ptrdiff_t>(current));
//Now we have to correct all indices:
for (size_t i = 0; i < index_.size(); ++i)
{
@@ -343,14 +343,14 @@ void CompressedVectorSiteContainer::deleteSite(size_t siteIndex) throw (IndexOut
{
//There was no other site pointing toward this pattern, so we remove it.
delete sites_[current];
- sites_.erase(sites_.begin() + current);
+ sites_.erase(sites_.begin() + static_cast<ptrdiff_t>(current));
//Now we have to correct all indices:
for (size_t j = 0; j < index_.size(); ++j)
{
if (index_[j] > current) index_[j]--;
}
}
- index_.erase(index_.begin() + siteIndex);
+ index_.erase(index_.begin() + static_cast<ptrdiff_t>(siteIndex));
}
/******************************************************************************/
@@ -408,7 +408,7 @@ void CompressedVectorSiteContainer::addSite(const Site& site, size_t siteIndex,
Site* copy = dynamic_cast<Site*>(site.clone());
sites_.push_back(copy);
}
- index_.insert(index_.begin() + siteIndex, index);
+ index_.insert(index_.begin() + static_cast<ptrdiff_t>(siteIndex), index);
}
/******************************************************************************/
diff --git a/src/Bpp/Seq/Container/MapSequenceContainer.cpp b/src/Bpp/Seq/Container/MapSequenceContainer.cpp
index a79340e..a601853 100644
--- a/src/Bpp/Seq/Container/MapSequenceContainer.cpp
+++ b/src/Bpp/Seq/Container/MapSequenceContainer.cpp
@@ -7,7 +7,7 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -164,7 +164,7 @@ size_t MapSequenceContainer::getSequencePosition(const string& name)
const throw (SequenceNotFoundException)
{
// Specified sequence name research into all sequences
- int pos = 0;
+ size_t pos = 0;
for (map<string, Sequence*>::const_iterator it = sequences_.begin(); it != sequences_.end(); it++)
{
if (it->second->getName() == name) return pos;
diff --git a/src/Bpp/Seq/Container/OrderedSequenceContainer.h b/src/Bpp/Seq/Container/OrderedSequenceContainer.h
index 95903a4..c7a0ffe 100644
--- a/src/Bpp/Seq/Container/OrderedSequenceContainer.h
+++ b/src/Bpp/Seq/Container/OrderedSequenceContainer.h
@@ -168,6 +168,10 @@ class OrderedSequenceContainer:
* @{
*/
+ virtual int& valueAt(const std::string& sequenceName, size_t elementIndex) throw (SequenceNotFoundException, IndexOutOfBoundsException) = 0;
+
+ virtual const int& valueAt(const std::string& sequenceName, size_t elementIndex) const throw (SequenceNotFoundException, IndexOutOfBoundsException) = 0;
+
/**
* @brief Element access operator.
*
diff --git a/src/Bpp/Seq/Container/SequenceContainerIterator.cpp b/src/Bpp/Seq/Container/SequenceContainerIterator.cpp
index 140fa3a..977949d 100644
--- a/src/Bpp/Seq/Container/SequenceContainerIterator.cpp
+++ b/src/Bpp/Seq/Container/SequenceContainerIterator.cpp
@@ -67,7 +67,7 @@ const Sequence* SimpleSequenceContainerIterator::nextSequence()
bool SimpleSequenceContainerIterator::hasMoreSequences() const
{
- return currentPosition_ < static_cast<int>(sequences_->getNumberOfSequences());
+ return currentPosition_ < sequences_->getNumberOfSequences();
}
/******************************************************************************/
diff --git a/src/Bpp/Seq/Container/SequenceContainerIterator.h b/src/Bpp/Seq/Container/SequenceContainerIterator.h
index fc10feb..a4e227d 100644
--- a/src/Bpp/Seq/Container/SequenceContainerIterator.h
+++ b/src/Bpp/Seq/Container/SequenceContainerIterator.h
@@ -55,7 +55,7 @@ class AbstractSequenceContainerIterator :
{
protected:
const OrderedSequenceContainer* sequences_;
- int currentPosition_;
+ size_t currentPosition_;
public:
AbstractSequenceContainerIterator(const OrderedSequenceContainer& sites);
diff --git a/src/Bpp/Seq/Container/SequenceContainerTools.cpp b/src/Bpp/Seq/Container/SequenceContainerTools.cpp
index 72c2d9c..f0cb500 100644
--- a/src/Bpp/Seq/Container/SequenceContainerTools.cpp
+++ b/src/Bpp/Seq/Container/SequenceContainerTools.cpp
@@ -41,6 +41,9 @@
#include "VectorSequenceContainer.h"
#include "../Alphabet/CodonAlphabet.h"
+// From bpp-core:
+#include <Bpp/Text/TextTools.h>
+
using namespace bpp;
// From the STL:
@@ -55,7 +58,7 @@ SequenceContainer* SequenceContainerTools::createContainerOfSpecifiedSize(const
VectorSequenceContainer* vsc = new VectorSequenceContainer(alphabet);
for (size_t i = 0; i < size; ++i)
{
- vsc->addSequence(BasicSequence("" + i, "", alphabet), false);
+ vsc->addSequence(BasicSequence(TextTools::toString(i), "", alphabet), false);
}
return vsc;
}
diff --git a/src/Bpp/Seq/Container/SequenceContainerTools.h b/src/Bpp/Seq/Container/SequenceContainerTools.h
index 7c79514..def3050 100644
--- a/src/Bpp/Seq/Container/SequenceContainerTools.h
+++ b/src/Bpp/Seq/Container/SequenceContainerTools.h
@@ -53,8 +53,8 @@ knowledge of the CeCILL license and that you accept its terms.
namespace bpp
{
-typedef std::vector<size_t> SequenceSelection;
-
+ typedef std::vector<size_t> SequenceSelection;
+ typedef std::vector<size_t> SiteSelection;
/**
* @brief Utilitary methods dealing with sequence containers.
*/
diff --git a/src/Bpp/Seq/Container/SiteContainerIterator.cpp b/src/Bpp/Seq/Container/SiteContainerIterator.cpp
index fec06e7..a5d5039 100644
--- a/src/Bpp/Seq/Container/SiteContainerIterator.cpp
+++ b/src/Bpp/Seq/Container/SiteContainerIterator.cpp
@@ -60,7 +60,7 @@ SimpleSiteContainerIterator::SimpleSiteContainerIterator(const SiteContainer& si
const Site* SimpleSiteContainerIterator::nextSite()
{
- const Site* s = &sites_->getSite(currentPosition_);
+ const Site* s = &sites_->getSite(static_cast<size_t>(currentPosition_));
currentPosition_++;
return s;
}
@@ -79,7 +79,7 @@ NoGapSiteContainerIterator::NoGapSiteContainerIterator(const SiteContainer& site
const Site* NoGapSiteContainerIterator::nextSite()
{
- const Site* s = &sites_->getSite(currentPosition_);
+ const Site* s = &sites_->getSite(static_cast<size_t>(currentPosition_));
currentPosition_ = nextSiteWithoutGapPosition(currentPosition_);
return s;
}
@@ -91,17 +91,17 @@ bool NoGapSiteContainerIterator::hasMoreSites() const
int NoGapSiteContainerIterator::nextSiteWithoutGapPosition(int current) const
{
- unsigned int position = current + 1;
+ size_t position = static_cast<size_t>(current + 1);
while (position < sites_->getNumberOfSites() && SiteTools::hasGap(sites_->getSite(position)))
position++;
- return position;
+ return static_cast<int>(position);
}
int NoGapSiteContainerIterator::previousSiteWithoutGapPosition(int current) const
{
int position = current - 1;
- while (position >= 0 && SiteTools::hasGap(sites_->getSite(position)))
- position --;
+ while (position >= 0 && SiteTools::hasGap(sites_->getSite(static_cast<size_t>(position))))
+ position--;
return position;
}
@@ -114,7 +114,7 @@ CompleteSiteContainerIterator::CompleteSiteContainerIterator(const SiteContainer
const Site* CompleteSiteContainerIterator::nextSite()
{
- const Site* s = &sites_->getSite(currentPosition_);
+ const Site* s = &sites_->getSite(static_cast<size_t>(currentPosition_));
currentPosition_ = nextCompleteSitePosition(currentPosition_);
return s;
}
@@ -126,16 +126,16 @@ bool CompleteSiteContainerIterator::hasMoreSites() const
int CompleteSiteContainerIterator::nextCompleteSitePosition(int current) const
{
- unsigned int position = current + 1;
+ size_t position = static_cast<size_t>(current + 1);
while (position < sites_->getNumberOfSites() && !SiteTools::isComplete(sites_->getSite(position)))
- position ++;
- return position;
+ position++;
+ return static_cast<int>(position);
}
int CompleteSiteContainerIterator::previousCompleteSitePosition(int current) const
{
int position = current - 1;
- while (position >= 0 && !SiteTools::isComplete(sites_->getSite(position)))
+ while (position >= 0 && !SiteTools::isComplete(sites_->getSite(static_cast<size_t>(position))))
position --;
return position;
}
diff --git a/src/Bpp/Seq/Container/SiteContainerTools.cpp b/src/Bpp/Seq/Container/SiteContainerTools.cpp
index 386cb3a..afc51c3 100644
--- a/src/Bpp/Seq/Container/SiteContainerTools.cpp
+++ b/src/Bpp/Seq/Container/SiteContainerTools.cpp
@@ -43,6 +43,7 @@
#include "VectorSiteContainer.h"
#include "SiteContainerIterator.h"
#include "../SiteTools.h"
+#include "../CodonSiteTools.h"
#include "../Alphabet/AlphabetTools.h"
#include "../SequenceTools.h"
#include <Bpp/App/ApplicationTools.h>
@@ -106,8 +107,39 @@ SiteContainer* SiteContainerTools::getSelectedSites(
}
/******************************************************************************/
+SiteContainer* SiteContainerTools::getSelectedPositions(
+ const SiteContainer& sequences,
+ const SiteSelection& selection)
+{
+ size_t wsize = sequences.getAlphabet()->getStateCodingSize();
+ if (wsize > 1)
+ {
+ if (selection.size() % wsize != 0)
+ throw IOException("SiteContainerTools::getSelectedPositions: Positions selection is not compatible with the alphabet in use in the container.");
+ SiteSelection selection2;
+ for (size_t i = 0; i < selection.size(); i += wsize)
+ {
+ if (selection[i] % wsize != 0)
+ throw IOException("SiteContainerTools::getSelectedPositions: Positions selection is not compatible with the alphabet in use in the container.");
-const Sequence* SiteContainerTools::getConsensus(const SiteContainer& sc, const std::string& name, bool ignoreGap, bool resolveUnknown)
+ for (size_t j = 1; j < wsize; ++j)
+ {
+ if (selection[i + j] != (selection[i + j - 1] + 1))
+ throw IOException("SiteContainerTools::getSelectedPositions: Positions selection is not compatible with the alphabet in use in the container.");
+ }
+ selection2.push_back(selection[i] / wsize);
+ }
+ return getSelectedSites(sequences, selection2);
+ }
+ else
+ {
+ return getSelectedSites(sequences, selection);
+ }
+}
+
+/******************************************************************************/
+
+Sequence* SiteContainerTools::getConsensus(const SiteContainer& sc, const std::string& name, bool ignoreGap, bool resolveUnknown)
{
Vint consensus;
SimpleSiteContainerIterator ssi(sc);
@@ -143,7 +175,7 @@ const Sequence* SiteContainerTools::getConsensus(const SiteContainer& sc, const
}
consensus.push_back(cons);
}
- const Sequence* seqConsensus = new BasicSequence(name, consensus, sc.getAlphabet());
+ Sequence* seqConsensus = new BasicSequence(name, consensus, sc.getAlphabet());
return seqConsensus;
}
@@ -305,7 +337,7 @@ void SiteContainerTools::removeGapSites(SiteContainer& sites, double maxFreqGaps
/******************************************************************************/
-SiteContainer* SiteContainerTools::removeStopCodonSites(const SiteContainer& sites) throw (AlphabetException)
+SiteContainer* SiteContainerTools::removeStopCodonSites(const SiteContainer& sites, const GeneticCode& gCode) throw (AlphabetException)
{
const CodonAlphabet* pca = dynamic_cast<const CodonAlphabet*>(sites.getAlphabet());
if (!pca)
@@ -313,10 +345,10 @@ SiteContainer* SiteContainerTools::removeStopCodonSites(const SiteContainer& sit
vector<string> seqNames = sites.getSequencesNames();
VectorSiteContainer* noStopCont = new VectorSiteContainer(seqNames.size(), sites.getAlphabet());
noStopCont->setSequencesNames(seqNames, false);
- for (unsigned int i = 0; i < sites.getNumberOfSites(); i++)
+ for (size_t i = 0; i < sites.getNumberOfSites(); i++)
{
const Site* site = &sites.getSite(i);
- if (!SiteTools::hasStopCodon(*site))
+ if (!CodonSiteTools::hasStop(*site, gCode))
noStopCont->addSite(*site, false);
}
return noStopCont;
diff --git a/src/Bpp/Seq/Container/SiteContainerTools.h b/src/Bpp/Seq/Container/SiteContainerTools.h
index 3f3f391..4150196 100644
--- a/src/Bpp/Seq/Container/SiteContainerTools.h
+++ b/src/Bpp/Seq/Container/SiteContainerTools.h
@@ -8,43 +8,44 @@
#define _SITECONTAINERTOOLS_H_
/*
- Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
-
- This software is a computer program whose purpose is to provide classes
- for sequences analysis.
-
- This software is governed by the CeCILL license under French law and
- abiding by the rules of distribution of free software. You can use,
- modify and/ or redistribute the software under the terms of the CeCILL
- license as circulated by CEA, CNRS and INRIA at the following URL
- "http://www.cecill.info".
-
- As a counterpart to the access to the source code and rights to copy,
- modify and redistribute granted by the license, users are provided only
- with a limited warranty and the software's author, the holder of the
- economic rights, and the successive licensors have only limited
- liability.
-
- In this respect, the user's attention is drawn to the risks associated
- with loading, using, modifying and/or developing or reproducing the
- software by the user in light of its specific status of free software,
- that may mean that it is complicated to manipulate, and that also
- therefore means that it is reserved for developers and experienced
- professionals having in-depth computer knowledge. Users are therefore
- encouraged to load and test the software's suitability as regards their
- requirements in conditions enabling the security of their systems and/or
- data to be ensured and, more generally, to use and operate it in the
- same conditions as regards security.
-
- The fact that you are presently reading this means that you have had
- knowledge of the CeCILL license and that you accept its terms.
- */
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
+*/
#include "SiteContainer.h"
#include "VectorSiteContainer.h"
#include "AlignedSequenceContainer.h"
#include "../AlphabetIndex/AlphabetIndex2.h"
#include "../DistanceMatrix.h"
+#include "../GeneticCode/GeneticCode.h"
#include <Bpp/Numeric/Matrix/Matrix.h>
//From the STL:
@@ -54,95 +55,95 @@
namespace bpp
{
-typedef std::vector<size_t> SiteSelection;
+ typedef std::vector<size_t> SiteSelection;
/**
* @brief Some utililitary methods to deal with site containers.
*/
-class SiteContainerTools
-{
- public:
- SiteContainerTools() {}
- virtual ~SiteContainerTools() {}
-
- public:
-
- /**
- * @brief Retrieves sites without gaps from SiteContainer.
- *
- * This function build a new SiteContainer instance with only sites without gaps.
- * The container passed as input is not modified, all sites are copied.
- *
- * @param sites The container to analyse.
- * @return A pointer toward a new SiteContainer with only sites with no gaps.
- */
- static SiteContainer* getSitesWithoutGaps(const SiteContainer& sites);
-
- /**
- * @brief Retrieves complete sites from SiteContainer.
- *
- * This function build a new SiteContainer instance with only complete sites,
- * i.e. site with fully resolved states (no gap, no unknown caracters).
- * The container passed as input is not modified, all sites are copied.
- *
- * @param sites The container to analyse.
- * @return A pointer toward a new SiteContainer with only complete sites.
- */
- static SiteContainer* getCompleteSites(const SiteContainer& sites);
-
- /**
- * @brief Get a site set without gap-only sites.
- *
- * This function build a new SiteContainer instance without sites with only gaps.
- * The container passed as input is not modified, all sites are copied.
- *
+ class SiteContainerTools
+ {
+ public:
+ SiteContainerTools() {}
+ virtual ~SiteContainerTools() {}
+
+ public:
+
+ /**
+ * @brief Retrieves sites without gaps from SiteContainer.
+ *
+ * This function build a new SiteContainer instance with only sites without gaps.
+ * The container passed as input is not modified, all sites are copied.
+ *
+ * @param sites The container to analyse.
+ * @return A pointer toward a new SiteContainer with only sites with no gaps.
+ */
+ static SiteContainer* getSitesWithoutGaps(const SiteContainer& sites);
+
+ /**
+ * @brief Retrieves complete sites from SiteContainer.
+ *
+ * This function build a new SiteContainer instance with only complete sites,
+ * i.e. site with fully resolved states (no gap, no unknown caracters).
+ * The container passed as input is not modified, all sites are copied.
+ *
+ * @param sites The container to analyse.
+ * @return A pointer toward a new SiteContainer with only complete sites.
+ */
+ static SiteContainer* getCompleteSites(const SiteContainer& sites);
+
+ /**
+ * @brief Get a site set without gap-only sites.
+ *
+ * This function build a new SiteContainer instance without sites with only gaps.
+ * The container passed as input is not modified, all sites are copied.
+ *
* @see removeGapOnlySites(SiteContainer& sites)
- * @param sites The container to analyse.
- * @return A pointer toward a new SiteContainer.
- */
- static SiteContainer* removeGapOnlySites(const SiteContainer& sites);
+ * @param sites The container to analyse.
+ * @return A pointer toward a new SiteContainer.
+ */
+ static SiteContainer* removeGapOnlySites(const SiteContainer& sites);
/**
- * @brief Remove gap-only sites from a site set.
- *
- * @param sites The container where the sites have to be removed.
- */
- static void removeGapOnlySites(SiteContainer& sites);
-
- /**
- * @brief Get a site set without gap/unresolved-only sites.
- *
- * This function build a new SiteContainer instance without sites with only gaps or unresolved characters.
- * The container passed as input is not modified, all sites are copied.
- *
- * @param sites The container to analyse.
- * @return A pointer toward a new SiteContainer.
- */
- static SiteContainer* removeGapOrUnresolvedOnlySites(const SiteContainer& sites);
+ * @brief Remove gap-only sites from a site set.
+ *
+ * @param sites The container where the sites have to be removed.
+ */
+ static void removeGapOnlySites(SiteContainer& sites);
+
+ /**
+ * @brief Get a site set without gap/unresolved-only sites.
+ *
+ * This function build a new SiteContainer instance without sites with only gaps or unresolved characters.
+ * The container passed as input is not modified, all sites are copied.
+ *
+ * @param sites The container to analyse.
+ * @return A pointer toward a new SiteContainer.
+ */
+ static SiteContainer* removeGapOrUnresolvedOnlySites(const SiteContainer& sites);
/**
- * @brief Remove gap/unresolved-only sites from a site set.
- *
- * @param sites The container where the sites have to be removed.
- */
+ * @brief Remove gap/unresolved-only sites from a site set.
+ *
+ * @param sites The container where the sites have to be removed.
+ */
static void removeGapOrUnresolvedOnlySites(SiteContainer& sites);
/**
- * @brief Get a siteset with sites with less than a given amount of gaps.
- *
- * @param sites The container from which the sites have to be removed.
+ * @brief Get a siteset with sites with less than a given amount of gaps.
+ *
+ * @param sites The container from which the sites have to be removed.
* @param maxFreqGaps The maximum frequency of gaps in each site.
- * @return A pointer toward a new SiteContainer.
- */
- static SiteContainer* removeGapSites(const SiteContainer& sites, double maxFreqGaps);
+ * @return A pointer toward a new SiteContainer.
+ */
+ static SiteContainer* removeGapSites(const SiteContainer& sites, double maxFreqGaps);
/**
- * @brief Remove sites with a given amount of gaps.
- *
- * @param sites The container from which the sites have to be removed.
+ * @brief Remove sites with a given amount of gaps.
+ *
+ * @param sites The container from which the sites have to be removed.
* @param maxFreqGaps The maximum frequency of gaps in each site.
- */
- static void removeGapSites(SiteContainer& sites, double maxFreqGaps);
+ */
+ static void removeGapSites(SiteContainer& sites, double maxFreqGaps);
/**
* @brief Get a site set without stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception.
@@ -151,27 +152,49 @@ class SiteContainerTools
* The container passed as input is not modified, all sites are copied.
*
* @param sites The container to analyse.
+ * @param gCode the genetic code to use to determine stop codons.
* @return A pointer toward a new SiteContainer.
*/
- static SiteContainer* removeStopCodonSites(const SiteContainer& sites) throw (AlphabetException);
+ static SiteContainer* removeStopCodonSites(const SiteContainer& sites, const GeneticCode& gCode) throw (AlphabetException);
+
+ /**
+ * @brief Create a new container with a specified set of sites.
+ *
+ * A new VectorSiteContainer is created with specified sites.
+ * The destruction of the container is up to the user.
+ * Sites are specified by their indice, beginning at 0.
+ * No position verification is performed, based on the assumption that
+ * the container passed as an argument is a correct one.
+ * Redundant selection is not checked, so be careful with what you're doing!
+ *
+ * @param sequences The container from wich sequences are to be taken.
+ * @param selection The positions of all sites to retrieve.
+ * @return A new container with all selected sites.
+ */
+ static SiteContainer* getSelectedSites(const SiteContainer& sequences, const SiteSelection& selection);
+
+ /**
+ * @brief Create a new container with a specified set of positions.
+ *
+ * A new VectorSiteContainer is created with specified.
+ * The destruction of the container is up to the user.
+ *
+ * Positions are specified by their indice, beginning at 0, and
+ * are converted to site positions given the length of the words
+ * of the alphaber.
+ *
+ * No position verification is performed, based on the assumption that
+ * the container passed as an argument is a correct one.
+ * Redundant selection is not checked, so be careful with what you're doing!
+ *
+ * @param sequences The container from wich sequences are to be taken.
+ * @param selection The positions to retrieve.
+ * @return A new container with all selected sites.
+ */
+
+ static SiteContainer* getSelectedPositions(const SiteContainer& sequences, const SiteSelection& selection);
/**
- * @brief Create a new container with a specified set of sites.
- *
- * A new VectorSiteContainer is created with specified sites.
- * The destruction of the container is up to the user.
- * Sites are specified by their indice, beginning at 0.
- * No position verification is performed, based on the assumption that
- * the container passed as an argument is a correct one.
- * Redundant selection is not checked, so be careful with what you're doing!
- *
- * @param sequences The container from wich sequences are to be taken.
- * @param selection The positions of all sites to retrieve.
- * @return A new container with all selected sites.
- */
- static SiteContainer* getSelectedSites(const SiteContainer& sequences, const SiteSelection& selection);
-
- /**
* @brief create the consensus sequence of the alignment.
*
* In case of ambiguity (for instance a AATT site), one state will be chosen arbitrarily.
@@ -183,7 +206,7 @@ class SiteContainerTools
* If this option is set to true, a consensus sequence will never contain an unknown character.
* @return A new Sequence object with the consensus sequence.
*/
- static const Sequence* getConsensus(const SiteContainer & sc, const std::string& name = "consensus", bool ignoreGap = true, bool resolveUnknown = false);
+ static Sequence* getConsensus(const SiteContainer& sc, const std::string& name = "consensus", bool ignoreGap = true, bool resolveUnknown = false);
/**
* @brief Change all gaps to unknown state in a container, according to its alphabet.
@@ -466,7 +489,7 @@ class SiteContainerTools
* @author Julien Dutheil
*/
static std::vector<double> getSumOfPairsScores(const Matrix<size_t>& positions1, const Matrix<size_t>& positions2, double na = 0);
-};
+ };
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Container/VectorSequenceContainer.cpp b/src/Bpp/Seq/Container/VectorSequenceContainer.cpp
index 61085d5..f6cb50e 100644
--- a/src/Bpp/Seq/Container/VectorSequenceContainer.cpp
+++ b/src/Bpp/Seq/Container/VectorSequenceContainer.cpp
@@ -6,7 +6,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -262,7 +262,7 @@ Sequence* VectorSequenceContainer::removeSequence(size_t sequenceIndex) throw (I
throw IndexOutOfBoundsException("VectorSequenceContainer::removeSequence.", sequenceIndex, 0, sequences_.size() - 1);
Sequence* old = sequences_[sequenceIndex];
// Remove pointer toward old sequence:
- sequences_.erase(sequences_.begin() + sequenceIndex);
+ sequences_.erase(sequences_.begin() + static_cast<ptrdiff_t>(sequenceIndex));
// Send copy:
return old;
}
@@ -276,7 +276,7 @@ void VectorSequenceContainer::deleteSequence(size_t sequenceIndex) throw (IndexO
throw IndexOutOfBoundsException("VectorSequenceContainer::deleteSequence.", sequenceIndex, 0, sequences_.size() - 1);
delete sequences_[sequenceIndex];
// Remove pointer toward old sequence:
- sequences_.erase(sequences_.begin() + sequenceIndex);
+ sequences_.erase(sequences_.begin() + static_cast<ptrdiff_t>(sequenceIndex));
}
/******************************************************************************/
@@ -321,7 +321,7 @@ void VectorSequenceContainer::addSequence(const Sequence& sequence, size_t seque
if (sequence.getAlphabet()->getAlphabetType() == getAlphabet()->getAlphabetType())
{
// insert(begin() + pos, new Sequence(sequence.getName(), sequence.getContent(), alphabet));
- sequences_.insert(sequences_.begin() + sequenceIndex, dynamic_cast<Sequence*>(sequence.clone()));
+ sequences_.insert(sequences_.begin() + static_cast<ptrdiff_t>(sequenceIndex), dynamic_cast<Sequence*>(sequence.clone()));
}
else
throw AlphabetMismatchException("VectorSequenceContainer::addSequence : Alphabets don't match", getAlphabet(), sequence.getAlphabet());
diff --git a/src/Bpp/Seq/Container/VectorSequenceContainer.h b/src/Bpp/Seq/Container/VectorSequenceContainer.h
index c320c98..fc467f6 100644
--- a/src/Bpp/Seq/Container/VectorSequenceContainer.h
+++ b/src/Bpp/Seq/Container/VectorSequenceContainer.h
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -63,131 +63,131 @@ namespace bpp
class VectorSequenceContainer:
public AbstractSequenceContainer
{
- private:
+ private:
- /**
- * @brief A std::vector of pointers toward the sequences stored in the container.
- */
- mutable std::vector<Sequence*> sequences_;
+ /**
+ * @brief A std::vector of pointers toward the sequences stored in the container.
+ */
+ mutable std::vector<Sequence*> sequences_;
- public:
-
- /**
- * @brief Build a new container from a std::vector of pointers toward sequence objects.
- *
- * The addSequence() method is called uppon each Sequence object, hence each sequence is
- * <i>copied</i> into the container.
- *
- * @param vs The std::vector of pointers toward sequence objects.
- * @param alpha The alphabet to all sequences.
- * @throw AlphabetMismatchException if one sequence does not match the specified alphabet.
- */
- VectorSequenceContainer(
- const std::vector<const Sequence*>& vs, const Alphabet* alpha)
- throw (AlphabetMismatchException);
-
- /**
- * @brief Build an empty container that will contain sequences of a particular alphabet.
- *
- * @param alpha The alphabet of the container.
- */
- VectorSequenceContainer(const Alphabet* alpha): AbstractSequenceContainer(alpha), sequences_() {}
-
- /**
- * @name Copy contructors:
- *
- * @{
- */
-
- /**
- * @brief Copy from a VectorSequenceContainer.
- *
- * @param vsc The VectorSequenceContainer to copy into this container.
- */
- VectorSequenceContainer(const VectorSequenceContainer& vsc);
-
- /**
- * @brief Copy from an OrderedSequenceContainer.
- *
- * @param osc The OrderedSequenceContainer to copy into this container.
- */
- VectorSequenceContainer(const OrderedSequenceContainer& osc);
-
- /**
- * @brief Copy from a SequenceContainer.
- *
- * @param osc The SequenceContainer to copy into this container.
- */
- VectorSequenceContainer(const SequenceContainer& osc);
-
- /** @} */
-
- /**
- * @brief Assign from a VectorSequenceContainer.
- *
- * @param vsc The VectorSequenceContainer to copy into this container.
- */
- VectorSequenceContainer& operator=(const VectorSequenceContainer& vsc);
-
- /**
- * @brief Copy from an OrderedSequenceContainer.
- *
- * @param osc The OrderedSequenceContainer to copy into this container.
- */
- VectorSequenceContainer& operator=(const OrderedSequenceContainer& osc);
-
- /**
- * @brief Copy from a SequenceContainer.
- *
- * @param osc The SequenceContainer to copy into this container.
- */
- VectorSequenceContainer& operator=(const SequenceContainer& osc);
+ public:
+
+ /**
+ * @brief Build a new container from a std::vector of pointers toward sequence objects.
+ *
+ * The addSequence() method is called uppon each Sequence object, hence each sequence is
+ * <i>copied</i> into the container.
+ *
+ * @param vs The std::vector of pointers toward sequence objects.
+ * @param alpha The alphabet to all sequences.
+ * @throw AlphabetMismatchException if one sequence does not match the specified alphabet.
+ */
+ VectorSequenceContainer(
+ const std::vector<const Sequence*>& vs, const Alphabet* alpha)
+ throw (AlphabetMismatchException);
+
+ /**
+ * @brief Build an empty container that will contain sequences of a particular alphabet.
+ *
+ * @param alpha The alphabet of the container.
+ */
+ VectorSequenceContainer(const Alphabet* alpha): AbstractSequenceContainer(alpha), sequences_() {}
+
+ /**
+ * @name Copy contructors:
+ *
+ * @{
+ */
+
+ /**
+ * @brief Copy from a VectorSequenceContainer.
+ *
+ * @param vsc The VectorSequenceContainer to copy into this container.
+ */
+ VectorSequenceContainer(const VectorSequenceContainer& vsc);
+
+ /**
+ * @brief Copy from an OrderedSequenceContainer.
+ *
+ * @param osc The OrderedSequenceContainer to copy into this container.
+ */
+ VectorSequenceContainer(const OrderedSequenceContainer& osc);
+
+ /**
+ * @brief Copy from a SequenceContainer.
+ *
+ * @param osc The SequenceContainer to copy into this container.
+ */
+ VectorSequenceContainer(const SequenceContainer& osc);
+
+ /** @} */
/**
- * @brief Container destructor: delete all sequences in the container.
- */
- virtual ~VectorSequenceContainer() { clear(); }
-
- public:
-
- /**
- * @name The Clonable interface.
- *
- * @{
- */
- Clonable* clone() const { return new VectorSequenceContainer(*this); }
- /** @} */
-
- /**
- * @name The SequenceContainer interface.
- *
- * @{
- */
- bool hasSequence(const std::string& name) const;
-
+ * @brief Assign from a VectorSequenceContainer.
+ *
+ * @param vsc The VectorSequenceContainer to copy into this container.
+ */
+ VectorSequenceContainer& operator=(const VectorSequenceContainer& vsc);
+
+ /**
+ * @brief Copy from an OrderedSequenceContainer.
+ *
+ * @param osc The OrderedSequenceContainer to copy into this container.
+ */
+ VectorSequenceContainer& operator=(const OrderedSequenceContainer& osc);
+
+ /**
+ * @brief Copy from a SequenceContainer.
+ *
+ * @param osc The SequenceContainer to copy into this container.
+ */
+ VectorSequenceContainer& operator=(const SequenceContainer& osc);
+
+ /**
+ * @brief Container destructor: delete all sequences in the container.
+ */
+ virtual ~VectorSequenceContainer() { clear(); }
+
+ public:
+
+ /**
+ * @name The Clonable interface.
+ *
+ * @{
+ */
+ Clonable* clone() const { return new VectorSequenceContainer(*this); }
+ /** @} */
+
+ /**
+ * @name The SequenceContainer interface.
+ *
+ * @{
+ */
+ bool hasSequence(const std::string& name) const;
+
const Sequence& getSequence(const std::string& name) const throw (SequenceNotFoundException);
- void setSequence(const std::string& name, const Sequence& sequence, bool checkName = true) throw (Exception)
+ void setSequence(const std::string& name, const Sequence& sequence, bool checkName = true) throw (Exception)
{
setSequence(getSequencePosition(name), sequence, checkName);
}
- Sequence* removeSequence(const std::string& name) throw (SequenceNotFoundException)
+ Sequence* removeSequence(const std::string& name) throw (SequenceNotFoundException)
{
return removeSequence(getSequencePosition(name));
}
- void deleteSequence(const std::string& name) throw (SequenceNotFoundException)
+ void deleteSequence(const std::string& name) throw (SequenceNotFoundException)
{
deleteSequence(getSequencePosition(name));
}
- size_t getNumberOfSequences() const { return sequences_.size(); }
+ size_t getNumberOfSequences() const { return sequences_.size(); }
- std::vector<std::string> getSequencesNames() const;
- void setSequencesNames(const std::vector<std::string>& names, bool checkNames = true) throw (Exception);
- void clear();
- VectorSequenceContainer * createEmptyContainer() const;
+ std::vector<std::string> getSequencesNames() const;
+ void setSequencesNames(const std::vector<std::string>& names, bool checkNames = true) throw (Exception);
+ void clear();
+ VectorSequenceContainer * createEmptyContainer() const;
int& valueAt(const std::string& sequenceName, size_t elementIndex) throw (SequenceNotFoundException, IndexOutOfBoundsException)
{
@@ -227,77 +227,77 @@ class VectorSequenceContainer:
{
return getSequence(sequenceIndex)[elementIndex];
}
- /** @} */
+ /** @} */
- /**
- * @name The OrderedSequenceContainer interface.
- *
- * @{
- */
- void setComments(const std::string & name, const Comments& comments) throw (SequenceNotFoundException)
+ /**
+ * @name The OrderedSequenceContainer interface.
+ *
+ * @{
+ */
+ void setComments(const std::string & name, const Comments& comments) throw (SequenceNotFoundException)
{
- AbstractSequenceContainer::setComments(name, comments);
- }
-
- void setComments(size_t sequenceIndex, const Comments& comments) throw (IndexOutOfBoundsException);
- size_t getSequencePosition(const std::string& name) const throw (SequenceNotFoundException);
- const Sequence& getSequence(size_t sequenceIndex) const throw (IndexOutOfBoundsException);
- void setSequence(size_t sequenceIndex, const Sequence& sequence, bool checkName = true) throw (Exception);
- Sequence* removeSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException);
- void deleteSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException);
- /** @} */
-
- /**
- * @name Add sequence to this container.
- *
- * @{
- */
-
- /**
- * @brief Add a sequence at the end of the container.
- *
- * The sequence is copied into the container.
- * If checkNames is set to true, the method check if the name of the
- * sequence is already used in the container, and sends an exception if it
- * is the case. Otherwise, do not check the name: the method is hence faster,
- * but use it at your own risks!
- *
- * @param sequence The sequence to add.
- * @param checkName Tell if the method must check the name of the sequence
- * before adding it.
- * @throw Exception If the sequence couldn't be added to the container.
- */
- virtual void addSequence(const Sequence& sequence, bool checkName = true) throw (Exception);
-
- /**
- * @brief Add a sequence to the container at a particular position.
- *
- * The sequence is copied into the container.
- * If checkName is set to true, the method check if the name of the
- * sequence is already used in the container, and sends an exception if it
- * is the case. Otherwise, do not check the name: the method is hence faster,
- * but use it at your own risks!
- *
- * @param sequence The sequence to add.
- * @param sequenceIndex The position where to insert the new sequence.
- * All the following sequences will be pushed.
- * @param checkName Tell if the method must check the name of the sequence
- * before adding it.
- * @throw Exception If the sequence couldn't be added to the container.
- */
- virtual void addSequence(const Sequence& sequence, size_t sequenceIndex, bool checkName = true) throw (Exception);
-
- protected:
-
- /**
- * @name AbstractSequenceContainer methods.
- *
- * @{
- */
- Sequence& getSequence_(size_t i) throw (IndexOutOfBoundsException);
- Sequence& getSequence_(const std::string& name) throw (SequenceNotFoundException);
- /** @} */
+ AbstractSequenceContainer::setComments(name, comments);
+ }
+
+ void setComments(size_t sequenceIndex, const Comments& comments) throw (IndexOutOfBoundsException);
+ size_t getSequencePosition(const std::string& name) const throw (SequenceNotFoundException);
+ const Sequence& getSequence(size_t sequenceIndex) const throw (IndexOutOfBoundsException);
+ void setSequence(size_t sequenceIndex, const Sequence& sequence, bool checkName = true) throw (Exception);
+ Sequence* removeSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException);
+ void deleteSequence(size_t sequenceIndex) throw (IndexOutOfBoundsException);
+ /** @} */
+
+ /**
+ * @name Add sequence to this container.
+ *
+ * @{
+ */
+
+ /**
+ * @brief Add a sequence at the end of the container.
+ *
+ * The sequence is copied into the container.
+ * If checkNames is set to true, the method check if the name of the
+ * sequence is already used in the container, and sends an exception if it
+ * is the case. Otherwise, do not check the name: the method is hence faster,
+ * but use it at your own risks!
+ *
+ * @param sequence The sequence to add.
+ * @param checkName Tell if the method must check the name of the sequence
+ * before adding it.
+ * @throw Exception If the sequence couldn't be added to the container.
+ */
+ virtual void addSequence(const Sequence& sequence, bool checkName = true) throw (Exception);
+
+ /**
+ * @brief Add a sequence to the container at a particular position.
+ *
+ * The sequence is copied into the container.
+ * If checkName is set to true, the method check if the name of the
+ * sequence is already used in the container, and sends an exception if it
+ * is the case. Otherwise, do not check the name: the method is hence faster,
+ * but use it at your own risks!
+ *
+ * @param sequence The sequence to add.
+ * @param sequenceIndex The position where to insert the new sequence.
+ * All the following sequences will be pushed.
+ * @param checkName Tell if the method must check the name of the sequence
+ * before adding it.
+ * @throw Exception If the sequence couldn't be added to the container.
+ */
+ virtual void addSequence(const Sequence& sequence, size_t sequenceIndex, bool checkName = true) throw (Exception);
+
+ protected:
+
+ /**
+ * @name AbstractSequenceContainer methods.
+ *
+ * @{
+ */
+ Sequence& getSequence_(size_t i) throw (IndexOutOfBoundsException);
+ Sequence& getSequence_(const std::string& name) throw (SequenceNotFoundException);
+ /** @} */
};
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Container/VectorSiteContainer.cpp b/src/Bpp/Seq/Container/VectorSiteContainer.cpp
index e9371cd..331539a 100644
--- a/src/Bpp/Seq/Container/VectorSiteContainer.cpp
+++ b/src/Bpp/Seq/Container/VectorSiteContainer.cpp
@@ -5,7 +5,7 @@
//
/*
- Copyright or © or Copr. CNRS, (November 17, 2004)
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -331,7 +331,7 @@ Site* VectorSiteContainer::removeSite(size_t i) throw (IndexOutOfBoundsException
if (i >= getNumberOfSites())
throw IndexOutOfBoundsException("VectorSiteContainer::removeSite.", i, 0, getNumberOfSites() - 1);
Site* site = sites_[i];
- sites_.erase(sites_.begin() + i);
+ sites_.erase(sites_.begin() + static_cast<ptrdiff_t>(i));
return site;
}
@@ -342,7 +342,7 @@ void VectorSiteContainer::deleteSite(size_t i) throw (IndexOutOfBoundsException)
if (i >= getNumberOfSites())
throw IndexOutOfBoundsException("VectorSiteContainer::deleteSite.", i, 0, getNumberOfSites() - 1);
delete sites_[i];
- sites_.erase(sites_.begin() + i);
+ sites_.erase(sites_.begin() + static_cast<ptrdiff_t>(i));
}
/******************************************************************************/
@@ -355,7 +355,7 @@ void VectorSiteContainer::deleteSites(size_t siteIndex, size_t length) throw (In
{
delete sites_[i];
}
- sites_.erase(sites_.begin() + siteIndex, sites_.begin() + siteIndex + length);
+ sites_.erase(sites_.begin() + static_cast<ptrdiff_t>(siteIndex), sites_.begin() + static_cast<ptrdiff_t>(siteIndex + length));
}
/******************************************************************************/
@@ -446,7 +446,7 @@ void VectorSiteContainer::addSite(const Site& site, size_t siteIndex, bool check
}
// insert(begin() + pos, new Site(site));
- sites_.insert(sites_.begin() + siteIndex, dynamic_cast<Site*>(site.clone()));
+ sites_.insert(sites_.begin() + static_cast<ptrdiff_t>(siteIndex), dynamic_cast<Site*>(site.clone()));
}
/******************************************************************************/
@@ -479,7 +479,7 @@ void VectorSiteContainer::addSite(const Site& site, size_t siteIndex, int positi
Site* copy = dynamic_cast<Site*>(site.clone());
copy->setPosition(position);
- sites_.insert(sites_.begin() + siteIndex, copy);
+ sites_.insert(sites_.begin() + static_cast<ptrdiff_t>(siteIndex), copy);
}
/******************************************************************************/
@@ -636,13 +636,13 @@ Sequence* VectorSiteContainer::removeSequence(size_t i) throw (IndexOutOfBoundsE
}
// Now actualize names and comments:
- names_.erase(names_.begin() + i);
+ names_.erase(names_.begin() + static_cast<ptrdiff_t>(i));
if (comments_[i])
delete comments_[i];
- comments_.erase(comments_.begin() + i);
+ comments_.erase(comments_.begin() + static_cast<ptrdiff_t>(i));
// We remove the sequence, so the destruction of the sequence is up to the user:
// if (sequences_[i] != 0) delete sequences_[i];
- sequences_.erase(sequences_.begin() + i);
+ sequences_.erase(sequences_.begin() + static_cast<ptrdiff_t>(i));
return sequence;
}
@@ -667,13 +667,13 @@ void VectorSiteContainer::deleteSequence(size_t i) throw (IndexOutOfBoundsExcept
}
// Now actualize names and comments:
- names_.erase(names_.begin() + i);
+ names_.erase(names_.begin() + static_cast<ptrdiff_t>(i));
if (comments_[i])
delete comments_[i];
- comments_.erase(comments_.begin() + i);
+ comments_.erase(comments_.begin() + static_cast<ptrdiff_t>(i));
if (sequences_[i])
delete sequences_[i];
- sequences_.erase(sequences_.begin() + i);
+ sequences_.erase(sequences_.begin() + static_cast<ptrdiff_t>(i));
}
/******************************************************************************/
@@ -759,9 +759,9 @@ throw (Exception)
sites_[i]->addElement(pos, sequence.getValue(i));
}
// Actualize names and comments:
- names_.insert(names_.begin() + pos, sequence.getName());
- comments_.insert(comments_.begin() + pos, new Comments(sequence.getComments()));
- sequences_.insert(sequences_.begin() + pos, 0);
+ names_.insert(names_.begin() + static_cast<ptrdiff_t>(pos), sequence.getName());
+ comments_.insert(comments_.begin() + static_cast<ptrdiff_t>(pos), new Comments(sequence.getComments()));
+ sequences_.insert(sequences_.begin() + static_cast<ptrdiff_t>(pos), 0);
}
/******************************************************************************/
diff --git a/src/Bpp/Seq/DistanceMatrix.h b/src/Bpp/Seq/DistanceMatrix.h
index fd431c4..8945736 100644
--- a/src/Bpp/Seq/DistanceMatrix.h
+++ b/src/Bpp/Seq/DistanceMatrix.h
@@ -53,10 +53,11 @@ namespace bpp
* @brief A Matrix class to store phylogenetic distances.
*/
class DistanceMatrix:
- public virtual RowMatrix<double>
+ public virtual Clonable
{
private:
+ RowMatrix<double> distances_;
std::vector<std::string> names_;
public:
@@ -69,7 +70,8 @@ class DistanceMatrix:
* @param names The names to use.
*/
DistanceMatrix(const std::vector<std::string>& names):
- RowMatrix<double>(names.size(), names.size()), names_(names)
+ distances_(names.size(), names.size()),
+ names_(names)
{
reset();
}
@@ -82,14 +84,16 @@ class DistanceMatrix:
* @param n The size of the matrix.
*/
DistanceMatrix(size_t n):
- RowMatrix<double>(n, n), names_(n)
+ distances_(n, n), names_(n)
{
resize(n);
}
virtual ~DistanceMatrix() {}
- DistanceMatrix(const DistanceMatrix& dist): RowMatrix<double>(dist), names_(dist.names_) {}
+ DistanceMatrix(const DistanceMatrix& dist):
+ distances_(dist.distances_),
+ names_(dist.names_) {}
DistanceMatrix& operator=(const DistanceMatrix& dist)
{
@@ -99,12 +103,14 @@ class DistanceMatrix:
{
for(size_t j = 0; j < n; ++j)
{
- operator()(i, j) = dist(i, j);
+ distances_(i, j) = dist(i, j);
}
}
names_ = dist.names_;
return *this;
}
+
+ DistanceMatrix* clone() const { return new DistanceMatrix(*this); }
public:
@@ -114,11 +120,11 @@ class DistanceMatrix:
void reset()
{
size_t n = size();
- for (size_t i = 0; i < n; i++)
+ for (size_t i = 0; i < n; ++i)
{
- for (size_t j = 0; j < n; j++)
+ for (size_t j = 0; j < n; ++j)
{
- operator()(i, j) = 0;
+ distances_(i, j) = 0;
}
}
}
@@ -184,7 +190,8 @@ class DistanceMatrix:
* @param n the new dimension of the matrix.
*/
void resize(size_t n) {
- RowMatrix<double>::resize(n, n);
+ //RowMatrix<double>::resize(n, n);
+ distances_.resize(n, n);
names_.resize(n);
for (size_t i = 0; i < n; ++i)
names_[i] = "Taxon " + TextTools::toString(i);
@@ -203,7 +210,8 @@ class DistanceMatrix:
{
size_t i = getNameIndex(iName);
size_t j = getNameIndex(jName);
- return operator()(i,j);
+ //return operator()(i,j);
+ return distances_(i,j);
}
/**
@@ -218,16 +226,27 @@ class DistanceMatrix:
{
size_t i = getNameIndex(iName);
size_t j = getNameIndex(jName);
- return operator()(i,j);
+ //return operator()(i,j);
+ return distances_(i,j);
}
virtual const double& operator()(size_t i, size_t j) const
{
- return RowMatrix<double>::operator()(i, j);
+ //return RowMatrix<double>::operator()(i, j);
+ return distances_(i, j);
}
virtual double& operator()(size_t i, size_t j)
{
- return RowMatrix<double>::operator()(i, j);
+ //return RowMatrix<double>::operator()(i, j);
+ return distances_(i, j);
+ }
+
+ virtual const Matrix<double>& asMatrix() const {
+ return distances_;
+ }
+
+ virtual Matrix<double>& asMatrix() {
+ return distances_;
}
};
diff --git a/src/Bpp/Seq/GeneticCode/AscidianMitochondrialGeneticCode.cpp b/src/Bpp/Seq/GeneticCode/AscidianMitochondrialGeneticCode.cpp
new file mode 100644
index 0000000..b670804
--- /dev/null
+++ b/src/Bpp/Seq/GeneticCode/AscidianMitochondrialGeneticCode.cpp
@@ -0,0 +1,117 @@
+//
+// File: AscidianMitochondrialGeneticCode.h
+// Created by: Fidel Botero and Julien Dutheil
+// Created on: Thu Jun 13 11:32:00 CET 2013
+//
+
+/*
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+This software is a computer program whose purpose is to provide classes
+for sequences analysis.
+
+This software is governed by the CeCILL license under French law and
+abiding by the rules of distribution of free software. You can use,
+modify and/ or redistribute the software under the terms of the CeCILL
+license as circulated by CEA, CNRS and INRIA at the following URL
+"http://www.cecill.info".
+
+As a counterpart to the access to the source code and rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors have only limited
+liability.
+
+In this respect, the user's attention is drawn to the risks associated
+with loading, using, modifying and/or developing or reproducing the
+software by the user in light of its specific status of free software,
+that may mean that it is complicated to manipulate, and that also
+therefore means that it is reserved for developers and experienced
+professionals having in-depth computer knowledge. Users are therefore
+encouraged to load and test the software's suitability as regards their
+requirements in conditions enabling the security of their systems and/or
+data to be ensured and, more generally, to use and operate it in the
+same conditions as regards security.
+
+The fact that you are presently reading this means that you have had
+knowledge of the CeCILL license and that you accept its terms.
+*/
+
+#include "AscidianMitochondrialGeneticCode.h"
+
+using namespace bpp;
+
+#include <iostream>
+
+using namespace std;
+
+AscidianMitochondrialGeneticCode::AscidianMitochondrialGeneticCode(const NucleicAlphabet* alphabet) :
+ GeneticCode(alphabet)
+{
+ tlnTable_[0] = 11; //AAA -> K
+ tlnTable_[1] = 2; //AAC -> N
+ tlnTable_[2] = 11; //AAG -> K
+ tlnTable_[3] = 2; //AAT -> N
+ tlnTable_[4] = 16; //ACA -> T
+ tlnTable_[5] = 16; //ACC -> T
+ tlnTable_[6] = 16; //ACG -> T
+ tlnTable_[7] = 16; //ACT -> T
+ tlnTable_[8] = 7; //AGA -> G
+ tlnTable_[9] = 15; //AGC -> S
+ tlnTable_[10] = 7; //AGG -> G
+ tlnTable_[11] = 15; //AGT -> S
+ tlnTable_[12] = 12; //ATA -> M
+ tlnTable_[13] = 9; //ATC -> I
+ tlnTable_[14] = 12; //ATG -> M
+ tlnTable_[15] = 9; //ATT -> I
+ tlnTable_[16] = 5; //CAA -> Q
+ tlnTable_[17] = 8; //CAC -> H
+ tlnTable_[18] = 5; //CAG -> Q
+ tlnTable_[19] = 8; //CAT -> H
+ tlnTable_[20] = 14; //CCA -> P
+ tlnTable_[21] = 14; //CCC -> P
+ tlnTable_[22] = 14; //CCG -> P
+ tlnTable_[23] = 14; //CCT -> P
+ tlnTable_[24] = 1; //CGA -> R
+ tlnTable_[25] = 1; //CGC -> R
+ tlnTable_[26] = 1; //CGG -> R
+ tlnTable_[27] = 1; //CGT -> R
+ tlnTable_[28] = 10; //CTA -> L
+ tlnTable_[29] = 10; //CTC -> L
+ tlnTable_[30] = 10; //CTG -> L
+ tlnTable_[31] = 10; //CTT -> L
+ tlnTable_[32] = 6; //GAA -> E
+ tlnTable_[33] = 3; //GAC -> D
+ tlnTable_[34] = 6; //GAG -> E
+ tlnTable_[35] = 3; //GAT -> D
+ tlnTable_[36] = 0; //GCA -> A
+ tlnTable_[37] = 0; //GCC -> A
+ tlnTable_[38] = 0; //GCG -> A
+ tlnTable_[39] = 0; //GCT -> A
+ tlnTable_[40] = 7; //GGA -> G
+ tlnTable_[41] = 7; //GGC -> G
+ tlnTable_[42] = 7; //GGG -> G
+ tlnTable_[43] = 7; //GGT -> G
+ tlnTable_[44] = 19; //GTA -> V
+ tlnTable_[45] = 19; //GTC -> V
+ tlnTable_[46] = 19; //GTG -> V
+ tlnTable_[47] = 19; //GTT -> V
+ tlnTable_[48] = -99; //TAA -> STOP
+ tlnTable_[49] = 18; //TAC -> Y
+ tlnTable_[50] = -99; //TAG -> STOP
+ tlnTable_[51] = 18; //TAT -> Y
+ tlnTable_[52] = 15; //TCA -> S
+ tlnTable_[53] = 15; //TCC -> S
+ tlnTable_[54] = 15; //TCG -> S
+ tlnTable_[55] = 15; //TCT -> S
+ tlnTable_[56] = 17; //TGA -> W
+ tlnTable_[57] = 4; //TGC -> C
+ tlnTable_[58] = 17; //TGG -> W
+ tlnTable_[59] = 4; //TGT -> C
+ tlnTable_[60] = 10; //TTA -> L
+ tlnTable_[61] = 13; //TTC -> F
+ tlnTable_[62] = 10; //TTG -> L
+ tlnTable_[63] = 13; //TTT -> F
+ tlnTable_[codonAlphabet_.getUnknownCharacterCode()] = proteicAlphabet_.getUnknownCharacterCode();
+}
+
diff --git a/src/Bpp/Seq/GeneticCode/AscidianMitochondrialGeneticCode.h b/src/Bpp/Seq/GeneticCode/AscidianMitochondrialGeneticCode.h
new file mode 100644
index 0000000..63bbf29
--- /dev/null
+++ b/src/Bpp/Seq/GeneticCode/AscidianMitochondrialGeneticCode.h
@@ -0,0 +1,113 @@
+//
+// File: AscidianMitochondrialGeneticCode.h
+// Created by: Fidel Botero and Julien Dutheil
+// Created on: Thu Jun 13 11:32:00 CET 2013
+//
+
+/*
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+This software is a computer program whose purpose is to provide classes
+for sequences analysis.
+
+This software is governed by the CeCILL license under French law and
+abiding by the rules of distribution of free software. You can use,
+modify and/ or redistribute the software under the terms of the CeCILL
+license as circulated by CEA, CNRS and INRIA at the following URL
+"http://www.cecill.info".
+
+As a counterpart to the access to the source code and rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors have only limited
+liability.
+
+In this respect, the user's attention is drawn to the risks associated
+with loading, using, modifying and/or developing or reproducing the
+software by the user in light of its specific status of free software,
+that may mean that it is complicated to manipulate, and that also
+therefore means that it is reserved for developers and experienced
+professionals having in-depth computer knowledge. Users are therefore
+encouraged to load and test the software's suitability as regards their
+requirements in conditions enabling the security of their systems and/or
+data to be ensured and, more generally, to use and operate it in the
+same conditions as regards security.
+
+The fact that you are presently reading this means that you have had
+knowledge of the CeCILL license and that you accept its terms.
+*/
+
+#ifndef _ASCIDIANMITOCHONDRIALGENETICCODE_H_
+#define _ASCIDIANMITOCHONDRIALGENETICCODE_H_
+
+#include "GeneticCode.h"
+#include "../Alphabet/NucleicAlphabet.h"
+
+namespace bpp
+{
+
+/**
+ * @brief This class implements the ascidian mitochondrial genetic code as describe on the NCBI
+ * web site: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG13
+ */
+class AscidianMitochondrialGeneticCode:
+ public virtual GeneticCode
+{
+ public:
+ AscidianMitochondrialGeneticCode(const NucleicAlphabet* alphabet);
+
+ virtual ~AscidianMitochondrialGeneticCode() {}
+
+ virtual AscidianMitochondrialGeneticCode* clone() const {
+ return new AscidianMitochondrialGeneticCode(*this);
+ }
+
+ public:
+ size_t getNumberOfStopCodons() const { return 2.; }
+
+ std::vector<int> getStopCodonsAsInt() const {
+ std::vector<int> v(4);
+ v[0] = 8;
+ v[1] = 10;
+ v[2] = 48;
+ v[3] = 50;
+ return v;
+ }
+
+ std::vector<std::string> getStopCodonsAsChar() const {
+ std::vector<std::string> v(4);
+ v[0] = "AGA";
+ v[1] = "AGG";
+ v[2] = "TAA";
+ v[4] = "TAG";
+ return v;
+ }
+
+ bool isStop(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 48 || state == 50);
+ }
+
+ bool isStop(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 48 || i == 50);
+ }
+
+ bool isAltStart(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 12 || state == 46 || state == 62);
+ }
+
+ bool isAltStart(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 12 || i == 46 || i == 62);
+ }
+
+};
+
+} //end of namespace bpp.
+
+#endif //_ASCIDIANMITOCHONDRIALGENETICCODE_H_
+
diff --git a/src/Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.cpp b/src/Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.cpp
index eeab5fd..b4af392 100644
--- a/src/Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.cpp
+++ b/src/Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.cpp
@@ -4,7 +4,7 @@
// Created on: 14 11:31:27 CET 2005
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -37,8 +37,6 @@ knowledge of the CeCILL license and that you accept its terms.
*/
#include "EchinodermMitochondrialGeneticCode.h"
-#include "../Alphabet/EchinodermMitochondrialCodonAlphabet.h"
-#include "../Alphabet/ProteicAlphabet.h"
using namespace bpp;
@@ -46,122 +44,73 @@ using namespace bpp;
using namespace std;
-EchinodermMitochondrialGeneticCode::EchinodermMitochondrialGeneticCode(const NucleicAlphabet * alpha) : GeneticCode()
+EchinodermMitochondrialGeneticCode::EchinodermMitochondrialGeneticCode(const NucleicAlphabet* alphabet) :
+ GeneticCode(alphabet)
{
- codonAlphabet_ = new EchinodermMitochondrialCodonAlphabet(alpha);
- proteicAlphabet_ = new ProteicAlphabet();
-}
-
-EchinodermMitochondrialGeneticCode::~EchinodermMitochondrialGeneticCode()
-{
- delete codonAlphabet_;
- delete proteicAlphabet_;
-}
-
-int EchinodermMitochondrialGeneticCode::translate(int state) const throw (Exception)
-{
- if(state == codonAlphabet_->getUnknownCharacterCode()) return proteicAlphabet_->getUnknownCharacterCode();
- vector<int> positions = codonAlphabet_->getPositions(state);
- switch(positions[0])
- {
- //First position:
- case 0 : //A
- switch(positions[1])
- {
- //Second position:
- case 0 : //AA
- switch(positions[2])
- {
- //Third position:
- case 2 : return proteicAlphabet_->charToInt("K"); //Lysine
- case 0 : case 1 : case 3 : return proteicAlphabet_->charToInt("N"); //Asparagine
- }
- case 1 : //AC
- return proteicAlphabet_->charToInt("T"); //Threonine
- case 2 : //AG
- switch(positions[2])
- {
- //Third position:
- case 0 : case 1 : case 2 : case 3 : return proteicAlphabet_->charToInt("S"); //Serine
- }
- case 3 : //AT
- switch(positions[2])
- {
- //Third position:
- case 2: return proteicAlphabet_->charToInt("M"); //Methionine
- case 0 : case 1 : case 3 : return proteicAlphabet_->charToInt("I"); //Isoleucine
- }
- }
- case 1 : //C
- switch(positions[1])
- {
- //Second position:
- case 0 : //CA
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("Q"); //Glutamine
- case 1 : case 3 : return proteicAlphabet_->charToInt("H"); //Histidine
- }
- case 1 : //CC
- return proteicAlphabet_->charToInt("P"); //Proline
- case 2 : //CG
- return proteicAlphabet_->charToInt("R"); //Arginine
- case 3 : //CT
- return proteicAlphabet_->charToInt("L"); //Leucine
- }
- case 2 : //G
- switch(positions[1])
- {
- //Second position:
- case 0 : //GA
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("E"); //Glutamic acid
- case 1 : case 3 : return proteicAlphabet_->charToInt("D"); //Aspartic acid
- }
- case 1 : //GC
- return proteicAlphabet_->charToInt("A"); //Alanine
- case 2 : //GG
- return proteicAlphabet_->charToInt("G"); //Glycine
- case 3 : //GT
- return proteicAlphabet_->charToInt("V"); //Valine
- }
- case 3 : //T(U)
- switch(positions[1])
- {
- //Second position:
- case 0 : //TA
- switch(positions[2])
- {
- //Third position:
- case 0 : throw StopCodonException("", "TAA"); //Stop codon
- case 2 : throw StopCodonException("", "TAG"); //Stop codon
- case 1 : case 3 : return proteicAlphabet_->charToInt("Y"); //Tyrosine
- }
- case 1 : //TC
- return proteicAlphabet_->charToInt("S"); //Serine
- case 2 : //TG
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("W"); //Tryptophane
- case 1 : case 3 : return proteicAlphabet_->charToInt("C"); //Cysteine
- }
- case 3 : //TT
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("L"); //Leucine
- case 1 : case 3 : return proteicAlphabet_->charToInt("F"); //Phenylalanine
- }
- }
- }
- throw BadIntException(state, "EchinodermMitochondrialGeneticCode::translate", codonAlphabet_);
-}
-
-string EchinodermMitochondrialGeneticCode::translate(const string & state) const throw (Exception) {
- return proteicAlphabet_->intToChar(translate(codonAlphabet_->charToInt(state)));
+ tlnTable_[0] = 2; //AAA -> N
+ tlnTable_[1] = 2; //AAC -> N
+ tlnTable_[2] = 11; //AAG -> K
+ tlnTable_[3] = 2; //AAT -> N
+ tlnTable_[4] = 16; //ACA -> T
+ tlnTable_[5] = 16; //ACC -> T
+ tlnTable_[6] = 16; //ACG -> T
+ tlnTable_[7] = 16; //ACT -> T
+ tlnTable_[8] = 15; //AGA -> S
+ tlnTable_[9] = 15; //AGC -> S
+ tlnTable_[10] = 15; //AGG -> S
+ tlnTable_[11] = 15; //AGT -> S
+ tlnTable_[12] = 9; //ATA -> I
+ tlnTable_[13] = 9; //ATC -> I
+ tlnTable_[14] = 12; //ATG -> M
+ tlnTable_[15] = 9; //ATT -> I
+ tlnTable_[16] = 5; //CAA -> Q
+ tlnTable_[17] = 8; //CAC -> H
+ tlnTable_[18] = 5; //CAG -> Q
+ tlnTable_[19] = 8; //CAT -> H
+ tlnTable_[20] = 14; //CCA -> P
+ tlnTable_[21] = 14; //CCC -> P
+ tlnTable_[22] = 14; //CCG -> P
+ tlnTable_[23] = 14; //CCT -> P
+ tlnTable_[24] = 1; //CGA -> R
+ tlnTable_[25] = 1; //CGC -> R
+ tlnTable_[26] = 1; //CGG -> R
+ tlnTable_[27] = 1; //CGT -> R
+ tlnTable_[28] = 10; //CTA -> L
+ tlnTable_[29] = 10; //CTC -> L
+ tlnTable_[30] = 10; //CTG -> L
+ tlnTable_[31] = 10; //CTT -> L
+ tlnTable_[32] = 6; //GAA -> E
+ tlnTable_[33] = 3; //GAC -> D
+ tlnTable_[34] = 6; //GAG -> E
+ tlnTable_[35] = 3; //GAT -> D
+ tlnTable_[36] = 0; //GCA -> A
+ tlnTable_[37] = 0; //GCC -> A
+ tlnTable_[38] = 0; //GCG -> A
+ tlnTable_[39] = 0; //GCT -> A
+ tlnTable_[40] = 7; //GGA -> G
+ tlnTable_[41] = 7; //GGC -> G
+ tlnTable_[42] = 7; //GGG -> G
+ tlnTable_[43] = 7; //GGT -> G
+ tlnTable_[44] = 19; //GTA -> V
+ tlnTable_[45] = 19; //GTC -> V
+ tlnTable_[46] = 19; //GTG -> V
+ tlnTable_[47] = 19; //GTT -> V
+ tlnTable_[48] = -99; //TAA -> STOP
+ tlnTable_[49] = 18; //TAC -> Y
+ tlnTable_[50] = -99; //TAG -> STOP
+ tlnTable_[51] = 18; //TAT -> Y
+ tlnTable_[52] = 15; //TCA -> S
+ tlnTable_[53] = 15; //TCC -> S
+ tlnTable_[54] = 15; //TCG -> S
+ tlnTable_[55] = 15; //TCT -> S
+ tlnTable_[56] = 17; //TGA -> W
+ tlnTable_[57] = 4; //TGC -> C
+ tlnTable_[58] = 17; //TGG -> W
+ tlnTable_[59] = 4; //TGT -> C
+ tlnTable_[60] = 10; //TTA -> L
+ tlnTable_[61] = 13; //TTC -> F
+ tlnTable_[62] = 10; //TTG -> L
+ tlnTable_[63] = 13; //TTT -> F
+ tlnTable_[codonAlphabet_.getUnknownCharacterCode()] = proteicAlphabet_.getUnknownCharacterCode();
}
diff --git a/src/Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.h b/src/Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.h
index bddfe20..e114dd5 100644
--- a/src/Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.h
+++ b/src/Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.h
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -51,21 +51,57 @@ namespace bpp
* Mitochondrial genetic code as describe on the NCBI website:
* http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG9
*/
-
class EchinodermMitochondrialGeneticCode:
- public GeneticCode
+ public virtual GeneticCode
{
public:
- EchinodermMitochondrialGeneticCode(const NucleicAlphabet * alpha);
- virtual ~EchinodermMitochondrialGeneticCode();
-
- public:
- int translate(int state) const throw (Exception);
- std::string translate(const std::string & state) const throw (Exception);
- Sequence * translate(const Sequence & sequence) const throw (Exception)
- {
- return GeneticCode::translate(sequence);
- }
+ EchinodermMitochondrialGeneticCode(const NucleicAlphabet* alphabet);
+
+ virtual ~EchinodermMitochondrialGeneticCode() {}
+
+ virtual EchinodermMitochondrialGeneticCode* clone() const {
+ return new EchinodermMitochondrialGeneticCode(*this);
+ }
+
+ public:
+ size_t getNumberOfStopCodons() const { return 2.; }
+
+ std::vector<int> getStopCodonsAsInt() const {
+ std::vector<int> v(2);
+ v[1] = 48;
+ v[2] = 50;
+ return v;
+ }
+
+ std::vector<std::string> getStopCodonsAsChar() const {
+ std::vector<std::string> v(2);
+ v[1] = "TAA";
+ v[2] = "TAG";
+ return v;
+ }
+
+ bool isStop(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 48 || state == 50);
+ }
+
+ bool isStop(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 48 || i == 50);
+ }
+
+ bool isAltStart(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 46);
+ }
+
+ bool isAltStart(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 46);
+ }
+
};
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/GeneticCode/GeneticCode.cpp b/src/Bpp/Seq/GeneticCode/GeneticCode.cpp
index e8b780c..13a60fa 100644
--- a/src/Bpp/Seq/GeneticCode/GeneticCode.cpp
+++ b/src/Bpp/Seq/GeneticCode/GeneticCode.cpp
@@ -52,13 +52,35 @@ StopCodonException::StopCodonException(const std::string& text, const std::strin
/**********************************************************************************************/
+int GeneticCode::translate(int state) const throw (BadIntException, Exception)
+{
+ if (isStop(state))
+ throw StopCodonException("GeneticCode::translate().", codonAlphabet_.intToChar(state));
+
+ map<int, int>::const_iterator it = tlnTable_.find(state);
+ if (it == tlnTable_.end())
+ throw BadIntException(state, "GeneticCode::translate().");
+
+ return it->second;
+}
+
+/**********************************************************************************************/
+
+std::string GeneticCode::translate(const std::string& state) const throw (BadCharException, Exception)
+{
+ int x = codonAlphabet_.charToInt(state);
+ return proteicAlphabet_.intToChar(translate(x));
+}
+
+/**********************************************************************************************/
+
vector<int> GeneticCode::getSynonymous(int aminoacid) const throw (BadIntException)
{
// test:
- proteicAlphabet_->intToChar(aminoacid);
+ proteicAlphabet_.intToChar(aminoacid);
vector<int> synonymes;
- for (unsigned int i = 0; i < codonAlphabet_->getSize(); ++i)
+ for (int i = 0; i < static_cast<int>(codonAlphabet_.getSize()); ++i)
{
try
{
@@ -76,15 +98,15 @@ vector<int> GeneticCode::getSynonymous(int aminoacid) const throw (BadIntExcepti
std::vector<std::string> GeneticCode::getSynonymous(const std::string& aminoacid) const throw (BadCharException)
{
// test:
- int aa = proteicAlphabet_->charToInt(aminoacid);
+ int aa = proteicAlphabet_.charToInt(aminoacid);
vector<string> synonymes;
- for (unsigned int i = 0; i < codonAlphabet_->getSize(); ++i)
+ for (int i = 0; i < static_cast<int>(codonAlphabet_.getSize()); ++i)
{
try
{
if (translate(i) == aa)
- synonymes.push_back(codonAlphabet_->intToChar(i));
+ synonymes.push_back(codonAlphabet_.intToChar(i));
}
catch (StopCodonException)
{ }
@@ -96,10 +118,10 @@ std::vector<std::string> GeneticCode::getSynonymous(const std::string& aminoacid
bool GeneticCode::isFourFoldDegenerated(int val) const
{
- if (codonAlphabet_->isStop(val))
+ if (isStop(val))
return false;
- vector<int> codon = codonAlphabet_->getPositions(val);
+ vector<int> codon = codonAlphabet_.getPositions(val);
int acid = translate(val);
// test all the substitution on third codon position
@@ -109,10 +131,9 @@ bool GeneticCode::isFourFoldDegenerated(int val) const
continue;
vector<int> mutcodon = codon;
mutcodon[2] = an;
- int intcodon = codonAlphabet_->getCodon(mutcodon[0], mutcodon[1], mutcodon[2]);
- if (codonAlphabet_->isStop(intcodon))
+ int intcodon = codonAlphabet_.getCodon(mutcodon[0], mutcodon[1], mutcodon[2]);
+ if (isStop(intcodon))
return false;
- ;
int altacid = translate(intcodon);
if (altacid != acid) // if non-synonymous
{
@@ -134,9 +155,9 @@ Sequence* GeneticCode::getCodingSequence(const Sequence& sequence, bool lookForI
// Look for AUG(or ATG) codon:
if (lookForInitCodon)
{
- for (unsigned int i = 0; i < sequence.size(); i++)
+ for (size_t i = 0; i < sequence.size(); i++)
{
- vector<int> pos = codonAlphabet_->getPositions(sequence[i]);
+ vector<int> pos = codonAlphabet_.getPositions(sequence[i]);
if (pos[0] == 0 && pos[1] == 3 && pos[2] == 2)
{
initPos = includeInitCodon ? i : i + 1;
@@ -147,7 +168,7 @@ Sequence* GeneticCode::getCodingSequence(const Sequence& sequence, bool lookForI
// Look for stop codon:
for (size_t i = initPos; i < sequence.size(); i++)
{
- if (codonAlphabet_->isStop(sequence[i]))
+ if (isStop(sequence[i]))
{
stopPos = i;
break;
@@ -159,7 +180,7 @@ Sequence* GeneticCode::getCodingSequence(const Sequence& sequence, bool lookForI
// Look for AUG(or ATG) codon:
if (lookForInitCodon)
{
- for (unsigned int i = 0; i < sequence.size() - 2; i++)
+ for (size_t i = 0; i < sequence.size() - 2; i++)
{
if (sequence[i] == 0 && sequence[i + 1] == 3 && sequence[i + 2] == 2)
{
@@ -169,13 +190,13 @@ Sequence* GeneticCode::getCodingSequence(const Sequence& sequence, bool lookForI
}
}
// Look for stop codon:
- const NucleicAlphabet* nucAlpha = codonAlphabet_->getNucleicAlphabet();
+ const NucleicAlphabet* nucAlpha = codonAlphabet_.getNucleicAlphabet();
for (size_t i = initPos; i < sequence.size() - 2; i += 3)
{
string codon = nucAlpha->intToChar(sequence[i])
+ nucAlpha->intToChar(sequence[i + 1])
+ nucAlpha->intToChar(sequence[i + 2]);
- if (codonAlphabet_->isStop(codon))
+ if (isStop(codon))
{
stopPos = i;
break;
diff --git a/src/Bpp/Seq/GeneticCode/GeneticCode.h b/src/Bpp/Seq/GeneticCode/GeneticCode.h
index 294144f..e3b1aab 100644
--- a/src/Bpp/Seq/GeneticCode/GeneticCode.h
+++ b/src/Bpp/Seq/GeneticCode/GeneticCode.h
@@ -77,45 +77,36 @@ namespace bpp
* @see CodonAlphabet, ProteicAlphabet
*/
class GeneticCode:
- public AbstractTransliterator
+ public AbstractTransliterator,
+ public virtual Clonable
{
protected:
- const CodonAlphabet* codonAlphabet_;
- const ProteicAlphabet* proteicAlphabet_;
+ CodonAlphabet codonAlphabet_;
+ ProteicAlphabet proteicAlphabet_;
+ std::map<int, int> tlnTable_;
public:
- GeneticCode():
- AbstractTransliterator() ,
- codonAlphabet_(0),
- proteicAlphabet_(0)
+ GeneticCode(const NucleicAlphabet* alphabet):
+ AbstractTransliterator(),
+ codonAlphabet_(alphabet),
+ proteicAlphabet_(),
+ tlnTable_()
{}
- GeneticCode(const GeneticCode& gc):
- AbstractTransliterator(gc),
- codonAlphabet_(gc.codonAlphabet_),
- proteicAlphabet_(gc.proteicAlphabet_)
- {}
-
- GeneticCode& operator=(const GeneticCode& gc)
- {
- AbstractTransliterator::operator=(gc);
- codonAlphabet_ = gc.codonAlphabet_;
- proteicAlphabet_ = gc.proteicAlphabet_;
- return *this;
- }
-
virtual ~GeneticCode() {}
+ virtual GeneticCode* clone() const = 0;
+
public:
/**
* @name Methods form the Transliterator interface.
*
* @{
*/
- const Alphabet* getSourceAlphabet() const { return codonAlphabet_; }
- const Alphabet* getTargetAlphabet() const { return proteicAlphabet_; }
- virtual int translate(int state) const throw (BadIntException, Exception) = 0;
- virtual std::string translate(const std::string& state) const throw (BadCharException, Exception) = 0;
+ const CodonAlphabet* getSourceAlphabet() const { return &codonAlphabet_; }
+ const ProteicAlphabet* getTargetAlphabet() const { return &proteicAlphabet_; }
+ virtual int translate(int state) const throw (BadIntException, Exception);
+ virtual std::string translate(const std::string& state) const throw (BadCharException, Exception);
virtual Sequence* translate(const Sequence& sequence) const throw (Exception)
{
return AbstractTransliterator::translate(sequence);
@@ -128,15 +119,110 @@ namespace bpp
*
* @{
*/
+
+ /**
+ * @return The number of stop codons.
+ */
+ virtual size_t getNumberOfStopCodons() const = 0;
+
+ /**
+ * @return A vector will all int codes for stop codons.
+ */
+ virtual std::vector<int> getStopCodonsAsInt() const = 0;
+
+ /**
+ * @return A vector will all char codes for stop codons.
+ */
+ virtual std::vector<std::string> getStopCodonsAsChar() const = 0;
+
+ /**
+ * @brief Tells is a particular codon is a stop codon.
+ *
+ * @param state The numeric code for the state to test.
+ * @return True if the state corresponds to a stop codon.
+ * @throw BadIntException if the state is not supported by the alphabet.
+ */
+ virtual bool isStop(int state) const throw (BadIntException) = 0;
+
+ /**
+ * @brief Tells is a particular codon is a stop codon.
+ *
+ * @param state The character code for the state to test.
+ * @return True if the state corresponds to a stop codon.
+ * @throw BadCharException if the state is not supported by the alphabet.
+ */
+ virtual bool isStop(const std::string& state) const throw (BadCharException) = 0;
+
+ /**
+ * @brief Tells is a particular codon is a start codon.
+ *
+ * @param state The numeric code for the state to test.
+ * @return True if the state corresponds to a start codon.
+ * @throw BadIntException if the state is not supported by the alphabet.
+ */
+ virtual bool isStart(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 14);
+ }
+
+ /**
+ * @brief Tells is a particular codon is a start codon.
+ *
+ * @param state The character code for the state to test.
+ * @return True if the state corresponds to a start codon.
+ * @throw BadCharException if the state is not supported by the alphabet.
+ */
+ virtual bool isStart(const std::string& state) const throw (BadCharException) {
+ return isStart(codonAlphabet_.charToInt(state));
+ }
+
+ /**
+ * @brief Tells is a particular codon is an alternative start codon.
+ *
+ * @param state The numeric code for the state to test.
+ * @return True if the state corresponds to an alternative start codon.
+ * @throw BadIntException if the state is not supported by the alphabet.
+ */
+ virtual bool isAltStart(int state) const throw (BadIntException) = 0;
+
+ /**
+ * @brief Tells is a particular codon is an alternative start codon.
+ *
+ * @param state The character code for the state to test.
+ * @return True if the state corresponds to an alternative start codon.
+ * @throw BadCharException if the state is not supported by the alphabet.
+ */
+ virtual bool isAltStart(const std::string& state) const throw (BadCharException) = 0;
+
+ /**
+ * @brief Tell if two codons are synonymous, that is, if they encode the same amino-acid.
+ *
+ * @param i The numeric code for the first codon.
+ * @param j The numeric code for the second codon.
+ * @return True if the two codons are synonymous.
+ * @throw BadIntException if at least one of the states is not supported by the alphabet.
+ */
bool areSynonymous(int i, int j) const throw (BadIntException)
{
return (translate(i) == translate(j));
}
+
+ /**
+ * @brief Tell if two codons are synonymous, that is, if they encode the same amino-acid.
+ *
+ * @param i The character code for the first codon.
+ * @param j The character code for the second codon.
+ * @return True if the two codons are synonymous.
+ * @throw BadCharException if at least one of the states is not supported by the alphabet.
+ */
bool areSynonymous(const std::string & i, const std::string & j) const throw (BadCharException)
{
return (translate(i) == translate(j));
}
+
std::vector<int> getSynonymous(int aminoacid) const throw (BadIntException);
+
std::vector<std::string> getSynonymous(const std::string & aminoacid) const throw (BadCharException);
/**
diff --git a/src/Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.cpp b/src/Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.cpp
index df9c867..b52f8a2 100644
--- a/src/Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.cpp
+++ b/src/Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.cpp
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -38,8 +38,6 @@ knowledge of the CeCILL license and that you accept its terms.
*/
#include "InvertebrateMitochondrialGeneticCode.h"
-#include "../Alphabet/InvertebrateMitochondrialCodonAlphabet.h"
-#include "../Alphabet/ProteicAlphabet.h"
using namespace bpp;
@@ -47,124 +45,73 @@ using namespace bpp;
using namespace std;
-InvertebrateMitochondrialGeneticCode::InvertebrateMitochondrialGeneticCode(const NucleicAlphabet * alpha) : GeneticCode()
+InvertebrateMitochondrialGeneticCode::InvertebrateMitochondrialGeneticCode(const NucleicAlphabet* alphabet) :
+ GeneticCode(alphabet)
{
- codonAlphabet_ = new InvertebrateMitochondrialCodonAlphabet(alpha);
- proteicAlphabet_ = new ProteicAlphabet();
-}
-
-InvertebrateMitochondrialGeneticCode::~InvertebrateMitochondrialGeneticCode()
-{
- delete codonAlphabet_;
- delete proteicAlphabet_;
-}
-
-int InvertebrateMitochondrialGeneticCode::translate(int state) const throw (Exception)
-{
- if(state == codonAlphabet_->getUnknownCharacterCode()) return proteicAlphabet_->getUnknownCharacterCode();
- vector<int> positions = codonAlphabet_->getPositions(state);
- switch(positions[0])
- {
- //First position:
- case 0 : //A
- switch(positions[1])
- {
- //Second position:
- case 0 : //AA
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("K"); //Lysine
- case 1 : case 3 : return proteicAlphabet_->charToInt("N"); //Asparagine
- }
- case 1 : //AC
- return proteicAlphabet_->charToInt("T"); //Threonine
- case 2 : //AG
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("S"); //Serine
- case 1 : case 3 : return proteicAlphabet_->charToInt("S"); //Serine
- }
- case 3 : //AT
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2: return proteicAlphabet_->charToInt("M"); //Methionine
- case 1 : case 3 : return proteicAlphabet_->charToInt("I"); //Isoleucine
- }
- }
- case 1 : //C
- switch(positions[1])
- {
- //Second position:
- case 0 : //CA
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("Q"); //Glutamine
- case 1 : case 3 : return proteicAlphabet_->charToInt("H"); //Histidine
- }
- case 1 : //CC
- return proteicAlphabet_->charToInt("P"); //Proline
- case 2 : //CG
- return proteicAlphabet_->charToInt("R"); //Arginine
- case 3 : //CT
- return proteicAlphabet_->charToInt("L"); //Leucine
- }
- case 2 : //G
- switch(positions[1])
- {
- //Second position:
- case 0 : //GA
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("E"); //Glutamic acid
- case 1 : case 3 : return proteicAlphabet_->charToInt("D"); //Aspartic acid
- }
- case 1 : //GC
- return proteicAlphabet_->charToInt("A"); //Alanine
- case 2 : //GG
- return proteicAlphabet_->charToInt("G"); //Glycine
- case 3 : //GT
- return proteicAlphabet_->charToInt("V"); //Valine
- }
- case 3 : //T(U)
- switch(positions[1])
- {
- //Second position:
- case 0 : //TA
- switch(positions[2])
- {
- //Third position:
- case 0 : throw StopCodonException("", "TAA"); //Stop codon
- case 2 : throw StopCodonException("", "TAG"); //Stop codon
- case 1 : case 3 : return proteicAlphabet_->charToInt("Y"); //Tyrosine
- }
- case 1 : //TC
- return proteicAlphabet_->charToInt("S"); //Serine
- case 2 : //TG
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("W"); //Tryptophane
- case 1 : case 3 : return proteicAlphabet_->charToInt("C"); //Cysteine
- }
- case 3 : //TT
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("L"); //Leucine
- case 1 : case 3 : return proteicAlphabet_->charToInt("F"); //Phenylalanine
- }
- }
- }
- throw BadIntException(state, "InvertebrateMitochondrialGeneticCode::translate", codonAlphabet_);
-}
-
-string InvertebrateMitochondrialGeneticCode::translate(const string & state) const throw (Exception)
-{
- return proteicAlphabet_->intToChar(translate(codonAlphabet_->charToInt(state)));
+ tlnTable_[0] = 11; //AAA -> K
+ tlnTable_[1] = 2; //AAC -> N
+ tlnTable_[2] = 11; //AAG -> K
+ tlnTable_[3] = 2; //AAT -> N
+ tlnTable_[4] = 16; //ACA -> T
+ tlnTable_[5] = 16; //ACC -> T
+ tlnTable_[6] = 16; //ACG -> T
+ tlnTable_[7] = 16; //ACT -> T
+ tlnTable_[8] = 15; //AGA -> S
+ tlnTable_[9] = 15; //AGC -> S
+ tlnTable_[10] = 15; //AGG -> S
+ tlnTable_[11] = 15; //AGT -> S
+ tlnTable_[12] = 12; //ATA -> M
+ tlnTable_[13] = 9; //ATC -> I
+ tlnTable_[14] = 12; //ATG -> M
+ tlnTable_[15] = 9; //ATT -> I
+ tlnTable_[16] = 5; //CAA -> Q
+ tlnTable_[17] = 8; //CAC -> H
+ tlnTable_[18] = 5; //CAG -> Q
+ tlnTable_[19] = 8; //CAT -> H
+ tlnTable_[20] = 14; //CCA -> P
+ tlnTable_[21] = 14; //CCC -> P
+ tlnTable_[22] = 14; //CCG -> P
+ tlnTable_[23] = 14; //CCT -> P
+ tlnTable_[24] = 1; //CGA -> R
+ tlnTable_[25] = 1; //CGC -> R
+ tlnTable_[26] = 1; //CGG -> R
+ tlnTable_[27] = 1; //CGT -> R
+ tlnTable_[28] = 10; //CTA -> L
+ tlnTable_[29] = 10; //CTC -> L
+ tlnTable_[30] = 10; //CTG -> L
+ tlnTable_[31] = 10; //CTT -> L
+ tlnTable_[32] = 6; //GAA -> E
+ tlnTable_[33] = 3; //GAC -> D
+ tlnTable_[34] = 6; //GAG -> E
+ tlnTable_[35] = 3; //GAT -> D
+ tlnTable_[36] = 0; //GCA -> A
+ tlnTable_[37] = 0; //GCC -> A
+ tlnTable_[38] = 0; //GCG -> A
+ tlnTable_[39] = 0; //GCT -> A
+ tlnTable_[40] = 7; //GGA -> G
+ tlnTable_[41] = 7; //GGC -> G
+ tlnTable_[42] = 7; //GGG -> G
+ tlnTable_[43] = 7; //GGT -> G
+ tlnTable_[44] = 19; //GTA -> V
+ tlnTable_[45] = 19; //GTC -> V
+ tlnTable_[46] = 19; //GTG -> V
+ tlnTable_[47] = 19; //GTT -> V
+ tlnTable_[48] = -99; //TAA -> STOP
+ tlnTable_[49] = 18; //TAC -> Y
+ tlnTable_[50] = -99; //TAG -> STOP
+ tlnTable_[51] = 18; //TAT -> Y
+ tlnTable_[52] = 15; //TCA -> S
+ tlnTable_[53] = 15; //TCC -> S
+ tlnTable_[54] = 15; //TCG -> S
+ tlnTable_[55] = 15; //TCT -> S
+ tlnTable_[56] = 17; //TGA -> W
+ tlnTable_[57] = 4; //TGC -> C
+ tlnTable_[58] = 17; //TGG -> W
+ tlnTable_[59] = 4; //TGT -> C
+ tlnTable_[60] = 10; //TTA -> L
+ tlnTable_[61] = 13; //TTC -> F
+ tlnTable_[62] = 10; //TTG -> L
+ tlnTable_[63] = 13; //TTT -> F
+ tlnTable_[codonAlphabet_.getUnknownCharacterCode()] = proteicAlphabet_.getUnknownCharacterCode();
}
diff --git a/src/Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.h b/src/Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.h
index 2309852..d6b07a4 100644
--- a/src/Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.h
+++ b/src/Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.h
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -52,21 +52,57 @@ namespace bpp
* Mitochondrial genetic code as describe on the NCBI website:
* http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG5
*/
-
class InvertebrateMitochondrialGeneticCode:
- public GeneticCode
+ public virtual GeneticCode
{
public:
- InvertebrateMitochondrialGeneticCode(const NucleicAlphabet * alpha);
- virtual ~InvertebrateMitochondrialGeneticCode();
+ InvertebrateMitochondrialGeneticCode(const NucleicAlphabet* alphabet);
+
+ virtual ~InvertebrateMitochondrialGeneticCode() {}
- public:
- int translate(int state) const throw (Exception);
- std::string translate(const std::string & state) const throw (Exception);
- Sequence * translate(const Sequence & sequence) const throw (Exception)
- {
- return GeneticCode::translate(sequence);
- }
+ virtual InvertebrateMitochondrialGeneticCode* clone() const {
+ return new InvertebrateMitochondrialGeneticCode(*this);
+ }
+
+ public:
+ size_t getNumberOfStopCodons() const { return 2.; }
+
+ std::vector<int> getStopCodonsAsInt() const {
+ std::vector<int> v(2);
+ v[1] = 48;
+ v[2] = 50;
+ return v;
+ }
+
+ std::vector<std::string> getStopCodonsAsChar() const {
+ std::vector<std::string> v(2);
+ v[1] = "TAA";
+ v[2] = "TAG";
+ return v;
+ }
+
+ bool isStop(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 48 || state == 50);
+ }
+
+ bool isStop(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 48 || i == 50);
+ }
+
+ bool isAltStart(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 12 || state == 13 || state == 15 || state == 46 || state == 62);
+ }
+
+ bool isAltStart(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 12 || i == 13 || i == 15 || i == 46 || i == 62);
+ }
+
};
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/GeneticCode/MoldMitochondrialGeneticCode.cpp b/src/Bpp/Seq/GeneticCode/MoldMitochondrialGeneticCode.cpp
new file mode 100644
index 0000000..0dd6adf
--- /dev/null
+++ b/src/Bpp/Seq/GeneticCode/MoldMitochondrialGeneticCode.cpp
@@ -0,0 +1,117 @@
+//
+// File: MoldMitochondrialGeneticCode.cpp
+// Created by: Julien Dutheil
+// Created on: Thu Jun 13 11:52 CET 2013
+//
+
+/*
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+This software is a computer program whose purpose is to provide classes
+for sequences analysis.
+
+This software is governed by the CeCILL license under French law and
+abiding by the rules of distribution of free software. You can use,
+modify and/ or redistribute the software under the terms of the CeCILL
+license as circulated by CEA, CNRS and INRIA at the following URL
+"http://www.cecill.info".
+
+As a counterpart to the access to the source code and rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors have only limited
+liability.
+
+In this respect, the user's attention is drawn to the risks associated
+with loading, using, modifying and/or developing or reproducing the
+software by the user in light of its specific status of free software,
+that may mean that it is complicated to manipulate, and that also
+therefore means that it is reserved for developers and experienced
+professionals having in-depth computer knowledge. Users are therefore
+encouraged to load and test the software's suitability as regards their
+requirements in conditions enabling the security of their systems and/or
+data to be ensured and, more generally, to use and operate it in the
+same conditions as regards security.
+
+The fact that you are presently reading this means that you have had
+knowledge of the CeCILL license and that you accept its terms.
+*/
+
+#include "MoldMitochondrialGeneticCode.h"
+
+using namespace bpp;
+
+#include <iostream>
+
+using namespace std;
+
+MoldMitochondrialGeneticCode::MoldMitochondrialGeneticCode(const NucleicAlphabet* alphabet) :
+ GeneticCode(alphabet)
+{
+ tlnTable_[0] = 11; //AAA -> K
+ tlnTable_[1] = 2; //AAC -> N
+ tlnTable_[2] = 11; //AAG -> K
+ tlnTable_[3] = 2; //AAT -> N
+ tlnTable_[4] = 16; //ACA -> T
+ tlnTable_[5] = 16; //ACC -> T
+ tlnTable_[6] = 16; //ACG -> T
+ tlnTable_[7] = 16; //ACT -> T
+ tlnTable_[8] = 1; //AGA -> R
+ tlnTable_[9] = 15; //AGC -> S
+ tlnTable_[10] = 1; //AGG -> R
+ tlnTable_[11] = 15; //AGT -> S
+ tlnTable_[12] = 9; //ATA -> I
+ tlnTable_[13] = 9; //ATC -> I
+ tlnTable_[14] = 12; //ATG -> M
+ tlnTable_[15] = 9; //ATT -> I
+ tlnTable_[16] = 5; //CAA -> Q
+ tlnTable_[17] = 8; //CAC -> H
+ tlnTable_[18] = 5; //CAG -> Q
+ tlnTable_[19] = 8; //CAT -> H
+ tlnTable_[20] = 14; //CCA -> P
+ tlnTable_[21] = 14; //CCC -> P
+ tlnTable_[22] = 14; //CCG -> P
+ tlnTable_[23] = 14; //CCT -> P
+ tlnTable_[24] = 1; //CGA -> R
+ tlnTable_[25] = 1; //CGC -> R
+ tlnTable_[26] = 1; //CGG -> R
+ tlnTable_[27] = 1; //CGT -> R
+ tlnTable_[28] = 10; //CTA -> L
+ tlnTable_[29] = 10; //CTC -> L
+ tlnTable_[30] = 10; //CTG -> L
+ tlnTable_[31] = 10; //CTT -> L
+ tlnTable_[32] = 6; //GAA -> E
+ tlnTable_[33] = 3; //GAC -> D
+ tlnTable_[34] = 6; //GAG -> E
+ tlnTable_[35] = 3; //GAT -> D
+ tlnTable_[36] = 0; //GCA -> A
+ tlnTable_[37] = 0; //GCC -> A
+ tlnTable_[38] = 0; //GCG -> A
+ tlnTable_[39] = 0; //GCT -> A
+ tlnTable_[40] = 7; //GGA -> G
+ tlnTable_[41] = 7; //GGC -> G
+ tlnTable_[42] = 7; //GGG -> G
+ tlnTable_[43] = 7; //GGT -> G
+ tlnTable_[44] = 19; //GTA -> V
+ tlnTable_[45] = 19; //GTC -> V
+ tlnTable_[46] = 19; //GTG -> V
+ tlnTable_[47] = 19; //GTT -> V
+ tlnTable_[48] = -99; //TAA -> STOP
+ tlnTable_[49] = 18; //TAC -> Y
+ tlnTable_[50] = -99; //TAG -> STOP
+ tlnTable_[51] = 18; //TAT -> Y
+ tlnTable_[52] = 15; //TCA -> S
+ tlnTable_[53] = 15; //TCC -> S
+ tlnTable_[54] = 15; //TCG -> S
+ tlnTable_[55] = 15; //TCT -> S
+ tlnTable_[56] = 17; //TGA -> W
+ tlnTable_[57] = 4; //TGC -> C
+ tlnTable_[58] = 17; //TGG -> W
+ tlnTable_[59] = 4; //TGT -> C
+ tlnTable_[60] = 10; //TTA -> L
+ tlnTable_[61] = 13; //TTC -> F
+ tlnTable_[62] = 10; //TTG -> L
+ tlnTable_[63] = 13; //TTT -> F
+ tlnTable_[codonAlphabet_.getUnknownCharacterCode()] = proteicAlphabet_.getUnknownCharacterCode();
+}
+
diff --git a/src/Bpp/Seq/GeneticCode/MoldMitochondrialGeneticCode.h b/src/Bpp/Seq/GeneticCode/MoldMitochondrialGeneticCode.h
new file mode 100644
index 0000000..0f1d7ab
--- /dev/null
+++ b/src/Bpp/Seq/GeneticCode/MoldMitochondrialGeneticCode.h
@@ -0,0 +1,111 @@
+//
+// File: MoldMitochondrialGeneticCode.h
+// Created by: Julien Dutheil
+// Created on: Thu Jun 13 11:52 CET 2013
+//
+
+/*
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+This software is a computer program whose purpose is to provide classes
+for sequences analysis.
+
+This software is governed by the CeCILL license under French law and
+abiding by the rules of distribution of free software. You can use,
+modify and/ or redistribute the software under the terms of the CeCILL
+license as circulated by CEA, CNRS and INRIA at the following URL
+"http://www.cecill.info".
+
+As a counterpart to the access to the source code and rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors have only limited
+liability.
+
+In this respect, the user's attention is drawn to the risks associated
+with loading, using, modifying and/or developing or reproducing the
+software by the user in light of its specific status of free software,
+that may mean that it is complicated to manipulate, and that also
+therefore means that it is reserved for developers and experienced
+professionals having in-depth computer knowledge. Users are therefore
+encouraged to load and test the software's suitability as regards their
+requirements in conditions enabling the security of their systems and/or
+data to be ensured and, more generally, to use and operate it in the
+same conditions as regards security.
+
+The fact that you are presently reading this means that you have had
+knowledge of the CeCILL license and that you accept its terms.
+*/
+
+
+#ifndef _MOLDMITOCHONDRIALGENETICCODE_H_
+#define _MOLDMITOCHONDRIALGENETICCODE_H_
+
+#include "GeneticCode.h"
+#include "../Alphabet/NucleicAlphabet.h"
+
+namespace bpp
+{
+
+/**
+ * @brief This class implements the mold, protozoan, and coelenterate mitochondrial code
+ * and the Mycoplasma/Spiroplasma code as describe on the NCBI website:
+ * http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG4
+ */
+class MoldMitochondrialGeneticCode:
+ public virtual GeneticCode
+{
+ public:
+ MoldMitochondrialGeneticCode(const NucleicAlphabet* alphabet);
+
+ virtual ~MoldMitochondrialGeneticCode() {}
+
+ virtual MoldMitochondrialGeneticCode* clone() const {
+ return new MoldMitochondrialGeneticCode(*this);
+ }
+
+ public:
+ size_t getNumberOfStopCodons() const { return 2.; }
+
+ std::vector<int> getStopCodonsAsInt() const {
+ std::vector<int> v(2);
+ v[1] = 48;
+ v[2] = 50;
+ return v;
+ }
+
+ std::vector<std::string> getStopCodonsAsChar() const {
+ std::vector<std::string> v(2);
+ v[1] = "TAA";
+ v[2] = "TAG";
+ return v;
+ }
+
+ bool isStop(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 48 || state == 50);
+ }
+
+ bool isStop(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 48 || i == 50);
+ }
+
+ bool isAltStart(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 12 || state == 13 || state == 15 || state == 30 || state == 46 || state == 60 || state == 62);
+ }
+
+ bool isAltStart(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 12 || i == 13 || i == 15 || i == 30 || i == 46 || i == 60 || i == 62);
+ }
+
+};
+
+} //end of namespace bpp.
+
+#endif //_MOLDMITOCHONDRIALGENETICCODE_H_
+
diff --git a/src/Bpp/Seq/GeneticCode/StandardGeneticCode.cpp b/src/Bpp/Seq/GeneticCode/StandardGeneticCode.cpp
index da6fba8..de6bcb5 100644
--- a/src/Bpp/Seq/GeneticCode/StandardGeneticCode.cpp
+++ b/src/Bpp/Seq/GeneticCode/StandardGeneticCode.cpp
@@ -38,8 +38,6 @@ knowledge of the CeCILL license and that you accept its terms.
*/
#include "StandardGeneticCode.h"
-#include "../Alphabet/StandardCodonAlphabet.h"
-#include "../Alphabet/ProteicAlphabet.h"
using namespace bpp;
@@ -47,126 +45,73 @@ using namespace bpp;
using namespace std;
-StandardGeneticCode::StandardGeneticCode(const NucleicAlphabet* alpha) : GeneticCode()
+StandardGeneticCode::StandardGeneticCode(const NucleicAlphabet* alphabet):
+ GeneticCode(alphabet)
{
- codonAlphabet_ = new StandardCodonAlphabet(alpha);
- proteicAlphabet_ = new ProteicAlphabet();
-}
-
-
-StandardGeneticCode::~StandardGeneticCode()
-{
- delete codonAlphabet_;
- delete proteicAlphabet_;
-}
-
-int StandardGeneticCode::translate(int state) const throw (Exception)
-{
- if (state == codonAlphabet_->getUnknownCharacterCode()) return proteicAlphabet_->getUnknownCharacterCode();
- vector<int> positions = codonAlphabet_->getPositions(state);
- switch (positions[0])
- {
- //First position:
- case 0 : //A
- switch (positions[1])
- {
- //Second position:
- case 0 : //AA
- switch (positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("K"); //Lysine
- case 1 : case 3 : return proteicAlphabet_->charToInt("N"); //Asparagine
- }
- case 1 : //AC
- return proteicAlphabet_->charToInt("T"); //Threonine
- case 2 : //AG
- switch (positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("R"); //Arginine
- case 1 : case 3 : return proteicAlphabet_->charToInt("S"); //Serine
- }
- case 3 : //AT
- switch (positions[2])
- {
- //Third position:
- case 2: return proteicAlphabet_->charToInt("M"); //Methionine
- case 0 : case 1 : case 3 : return proteicAlphabet_->charToInt("I"); //Isoleucine
- }
- }
- case 1 : //C
- switch (positions[1])
- {
- //Second position:
- case 0 : //CA
- switch (positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("Q"); //Glutamine
- case 1 : case 3 : return proteicAlphabet_->charToInt("H"); //Histidine
- }
- case 1 : //CC
- return proteicAlphabet_->charToInt("P"); //Proline
- case 2 : //CG
- return proteicAlphabet_->charToInt("R"); //Arginine
- case 3 : //CT
- return proteicAlphabet_->charToInt("L"); //Leucine
- }
- case 2 : //G
- switch (positions[1])
- {
- //Second position:
- case 0 : //GA
- switch (positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("E"); //Glutamic acid
- case 1 : case 3 : return proteicAlphabet_->charToInt("D"); //Aspartic acid
- }
- case 1 : //GC
- return proteicAlphabet_->charToInt("A"); //Alanine
- case 2 : //GG
- return proteicAlphabet_->charToInt("G"); //Glycine
- case 3 : //GT
- return proteicAlphabet_->charToInt("V"); //Valine
- }
- case 3 : //T(U)
- switch (positions[1])
- {
- //Second position:
- case 0 : //TA
- switch (positions[2])
- {
- //Third position:
- case 0 : throw StopCodonException("", "TAA"); //Stop codon
- case 2 : throw StopCodonException("", "TAG"); //Stop codon
- case 1 : case 3 : return proteicAlphabet_->charToInt("Y"); //Tyrosine
- }
- case 1 : //TC
- return proteicAlphabet_->charToInt("S"); //Serine
- case 2 : //TG
- switch (positions[2])
- {
- //Third position:
- case 0 : throw StopCodonException("", "TGA"); //Stop codon
- case 2 : return proteicAlphabet_->charToInt("W"); //Tryptophane
- case 1 : case 3 : return proteicAlphabet_->charToInt("C"); //Cysteine
- }
- case 3 : //TT
- switch (positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("L"); //Leucine
- case 1 : case 3 : return proteicAlphabet_->charToInt("F"); //Phenylalanine
- }
- }
- }
- throw BadIntException(state, "StandardGeneticCode::translate", codonAlphabet_);
-}
-
-string StandardGeneticCode::translate(const string& state) const throw (Exception)
-{
- return proteicAlphabet_->intToChar(translate(codonAlphabet_->charToInt(state)));
+ tlnTable_[0] = 11; //AAA -> K
+ tlnTable_[1] = 2; //AAC -> N
+ tlnTable_[2] = 11; //AAG -> K
+ tlnTable_[3] = 2; //AAT -> N
+ tlnTable_[4] = 16; //ACA -> T
+ tlnTable_[5] = 16; //ACC -> T
+ tlnTable_[6] = 16; //ACG -> T
+ tlnTable_[7] = 16; //ACT -> T
+ tlnTable_[8] = 1; //AGA -> R
+ tlnTable_[9] = 15; //AGC -> S
+ tlnTable_[10] = 1; //AGG -> R
+ tlnTable_[11] = 15; //AGT -> S
+ tlnTable_[12] = 9; //ATA -> I
+ tlnTable_[13] = 9; //ATC -> I
+ tlnTable_[14] = 12; //ATG -> M
+ tlnTable_[15] = 9; //ATT -> I
+ tlnTable_[16] = 5; //CAA -> Q
+ tlnTable_[17] = 8; //CAC -> H
+ tlnTable_[18] = 5; //CAG -> Q
+ tlnTable_[19] = 8; //CAT -> H
+ tlnTable_[20] = 14; //CCA -> P
+ tlnTable_[21] = 14; //CCC -> P
+ tlnTable_[22] = 14; //CCG -> P
+ tlnTable_[23] = 14; //CCT -> P
+ tlnTable_[24] = 1; //CGA -> R
+ tlnTable_[25] = 1; //CGC -> R
+ tlnTable_[26] = 1; //CGG -> R
+ tlnTable_[27] = 1; //CGT -> R
+ tlnTable_[28] = 10; //CTA -> L
+ tlnTable_[29] = 10; //CTC -> L
+ tlnTable_[30] = 10; //CTG -> L
+ tlnTable_[31] = 10; //CTT -> L
+ tlnTable_[32] = 6; //GAA -> E
+ tlnTable_[33] = 3; //GAC -> D
+ tlnTable_[34] = 6; //GAG -> E
+ tlnTable_[35] = 3; //GAT -> D
+ tlnTable_[36] = 0; //GCA -> A
+ tlnTable_[37] = 0; //GCC -> A
+ tlnTable_[38] = 0; //GCG -> A
+ tlnTable_[39] = 0; //GCT -> A
+ tlnTable_[40] = 7; //GGA -> G
+ tlnTable_[41] = 7; //GGC -> G
+ tlnTable_[42] = 7; //GGG -> G
+ tlnTable_[43] = 7; //GGT -> G
+ tlnTable_[44] = 19; //GTA -> V
+ tlnTable_[45] = 19; //GTC -> V
+ tlnTable_[46] = 19; //GTG -> V
+ tlnTable_[47] = 19; //GTT -> V
+ tlnTable_[48] = -99; // TAA -> STOP
+ tlnTable_[49] = 18; //TAC -> Y
+ tlnTable_[50] = -99; // TAG -> STOP
+ tlnTable_[51] = 18; //TAT -> Y
+ tlnTable_[52] = 15; //TCA -> S
+ tlnTable_[53] = 15; //TCC -> S
+ tlnTable_[54] = 15; //TCG -> S
+ tlnTable_[55] = 15; //TCT -> S
+ tlnTable_[56] = -99; // TGA -> STOP
+ tlnTable_[57] = 4; //TGC -> C
+ tlnTable_[58] = 17; //TGG -> W
+ tlnTable_[59] = 4; //TGT -> C
+ tlnTable_[60] = 10; //TTA -> L
+ tlnTable_[61] = 13; //TTC -> F
+ tlnTable_[62] = 10; //TTG -> L
+ tlnTable_[63] = 13; //TTT -> F
+ tlnTable_[codonAlphabet_.getUnknownCharacterCode()] = proteicAlphabet_.getUnknownCharacterCode();
}
diff --git a/src/Bpp/Seq/GeneticCode/StandardGeneticCode.h b/src/Bpp/Seq/GeneticCode/StandardGeneticCode.h
index 3f3eff4..f716e55 100644
--- a/src/Bpp/Seq/GeneticCode/StandardGeneticCode.h
+++ b/src/Bpp/Seq/GeneticCode/StandardGeneticCode.h
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -50,21 +50,59 @@ namespace bpp
* @brief This class implements the standard genetic code as describe on the NCBI
* web site: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG1
*/
-
class StandardGeneticCode:
- public GeneticCode
+ public virtual GeneticCode
{
public:
- StandardGeneticCode(const NucleicAlphabet * alpha);
- virtual ~StandardGeneticCode();
+ StandardGeneticCode(const NucleicAlphabet* alphabet);
+
+ virtual ~StandardGeneticCode() {}
- public:
- int translate(int state) const throw (Exception);
- std::string translate(const std::string & state) const throw (Exception);
- Sequence * translate(const Sequence & sequence) const throw (Exception)
- {
- return GeneticCode::translate(sequence);
- }
+ virtual StandardGeneticCode* clone() const {
+ return new StandardGeneticCode(*this);
+ }
+
+ public:
+ size_t getNumberOfStopCodons() const { return 3.; }
+
+ std::vector<int> getStopCodonsAsInt() const {
+ std::vector<int> v(3);
+ v[0] = 48;
+ v[1] = 50;
+ v[02] = 56;
+ return v;
+ }
+
+ std::vector<std::string> getStopCodonsAsChar() const {
+ std::vector<std::string> v(3);
+ v[0] = "TAA";
+ v[1] = "TAG";
+ v[2] = "TGA";
+ return v;
+ }
+
+ bool isStop(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 48 || state == 50 || state == 56);
+ }
+
+ bool isStop(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 48 || i == 50 || i == 56);
+ }
+
+ bool isAltStart(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 62 || state == 30);
+ }
+
+ bool isAltStart(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 62 || i == 30);
+ }
+
};
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.cpp b/src/Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.cpp
index 6915a18..f81ce00 100644
--- a/src/Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.cpp
+++ b/src/Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.cpp
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -38,8 +38,6 @@ knowledge of the CeCILL license and that you accept its terms.
*/
#include "VertebrateMitochondrialGeneticCode.h"
-#include "../Alphabet/VertebrateMitochondrialCodonAlphabet.h"
-#include "../Alphabet/ProteicAlphabet.h"
using namespace bpp;
@@ -47,126 +45,73 @@ using namespace bpp;
using namespace std;
-VertebrateMitochondrialGeneticCode::VertebrateMitochondrialGeneticCode(const NucleicAlphabet * alpha) : GeneticCode()
+VertebrateMitochondrialGeneticCode::VertebrateMitochondrialGeneticCode(const NucleicAlphabet* alphabet) :
+ GeneticCode(alphabet)
{
- codonAlphabet_ = new VertebrateMitochondrialCodonAlphabet(alpha);
- proteicAlphabet_ = new ProteicAlphabet();
-}
-
-
-VertebrateMitochondrialGeneticCode::~VertebrateMitochondrialGeneticCode()
-{
- delete codonAlphabet_;
- delete proteicAlphabet_;
-}
-
-int VertebrateMitochondrialGeneticCode::translate(int state) const throw (Exception)
-{
- if(state == codonAlphabet_->getUnknownCharacterCode()) return proteicAlphabet_->getUnknownCharacterCode();
- vector<int> positions = codonAlphabet_->getPositions(state);
- switch(positions[0])
- {
- //First position:
- case 0 : //A
- switch(positions[1])
- {
- //Second position:
- case 0 : //AA
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("K"); //Lysine
- case 1 : case 3 : return proteicAlphabet_->charToInt("N"); //Asparagine
- }
- case 1 : //AC
- return proteicAlphabet_->charToInt("T"); //Threonine
- case 2 : //AG
- switch(positions[2])
- {
- //Third position:
- case 0 : throw StopCodonException("", "AGA"); //Stop
- case 2 : throw StopCodonException("", "AGG"); //Stop
- case 1 : case 3 : return proteicAlphabet_->charToInt("S"); //Serine
- }
- case 3 : //AT
- switch(positions[2])
- {
- //Third position:
- case 2 : case 0 : return proteicAlphabet_->charToInt("M"); //Methionine
- case 1 : case 3 : return proteicAlphabet_->charToInt("I"); //Isoleucine
- }
- }
- case 1 : //C
- switch(positions[1])
- {
- //Second position:
- case 0 : //CA
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("Q"); //Glutamine
- case 1 : case 3 : return proteicAlphabet_->charToInt("H"); //Histidine
- }
- case 1 : //CC
- return proteicAlphabet_->charToInt("P"); //Proline
- case 2 : //CG
- return proteicAlphabet_->charToInt("R"); //Arginine
- case 3 : //CT
- return proteicAlphabet_->charToInt("L"); //Leucine
- }
- case 2 : //G
- switch(positions[1])
- {
- //Second position:
- case 0 : //GA
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("E"); //Glutamic acid
- case 1 : case 3 : return proteicAlphabet_->charToInt("D"); //Aspartic acid
- }
- case 1 : //GC
- return proteicAlphabet_->charToInt("A"); //Alanine
- case 2 : //GG
- return proteicAlphabet_->charToInt("G"); //Glycine
- case 3 : //GT
- return proteicAlphabet_->charToInt("V"); //Valine
- }
- case 3 : //T(U)
- switch(positions[1])
- {
- //Second position:
- case 0 : //TA
- switch(positions[2])
- {
- //Third position:
- case 0 : throw StopCodonException("", "TAA"); //Stop codon
- case 2 : throw StopCodonException("", "TAG"); //Stop codon
- case 1 : case 3 : return proteicAlphabet_->charToInt("Y"); //Tyrosine
- }
- case 1 : //TC
- return proteicAlphabet_->charToInt("S"); //Serine
- case 2 : //TG
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("W"); //Tryptophane
- case 1 : case 3 : return proteicAlphabet_->charToInt("C"); //Cysteine
- }
- case 3 : //TT
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("L"); //Leucine
- case 1 : case 3 : return proteicAlphabet_->charToInt("F"); //Phenylalanine
- }
- }
- }
- throw BadIntException(state, "VertebrateMitochondrialGeneticCode::translate", codonAlphabet_);
-}
-
-string VertebrateMitochondrialGeneticCode::translate(const string & state) const throw (Exception)
-{
- return proteicAlphabet_->intToChar(translate(codonAlphabet_->charToInt(state)));
+ tlnTable_[0] = 11; //AAA -> K
+ tlnTable_[1] = 2; //AAC -> N
+ tlnTable_[2] = 11; //AAG -> K
+ tlnTable_[3] = 2; //AAT -> N
+ tlnTable_[4] = 16; //ACA -> T
+ tlnTable_[5] = 16; //ACC -> T
+ tlnTable_[6] = 16; //ACG -> T
+ tlnTable_[7] = 16; //ACT -> T
+ tlnTable_[8] = -99; //AGA -> STOP
+ tlnTable_[9] = 15; //AGC -> S
+ tlnTable_[10] = -99; //AGG -> STOP
+ tlnTable_[11] = 15; //AGT -> S
+ tlnTable_[12] = 12; //ATA -> M
+ tlnTable_[13] = 9; //ATC -> I
+ tlnTable_[14] = 12; //ATG -> M
+ tlnTable_[15] = 9; //ATT -> I
+ tlnTable_[16] = 5; //CAA -> Q
+ tlnTable_[17] = 8; //CAC -> H
+ tlnTable_[18] = 5; //CAG -> Q
+ tlnTable_[19] = 8; //CAT -> H
+ tlnTable_[20] = 14; //CCA -> P
+ tlnTable_[21] = 14; //CCC -> P
+ tlnTable_[22] = 14; //CCG -> P
+ tlnTable_[23] = 14; //CCT -> P
+ tlnTable_[24] = 1; //CGA -> R
+ tlnTable_[25] = 1; //CGC -> R
+ tlnTable_[26] = 1; //CGG -> R
+ tlnTable_[27] = 1; //CGT -> R
+ tlnTable_[28] = 10; //CTA -> L
+ tlnTable_[29] = 10; //CTC -> L
+ tlnTable_[30] = 10; //CTG -> L
+ tlnTable_[31] = 10; //CTT -> L
+ tlnTable_[32] = 6; //GAA -> E
+ tlnTable_[33] = 3; //GAC -> D
+ tlnTable_[34] = 6; //GAG -> E
+ tlnTable_[35] = 3; //GAT -> D
+ tlnTable_[36] = 0; //GCA -> A
+ tlnTable_[37] = 0; //GCC -> A
+ tlnTable_[38] = 0; //GCG -> A
+ tlnTable_[39] = 0; //GCT -> A
+ tlnTable_[40] = 7; //GGA -> G
+ tlnTable_[41] = 7; //GGC -> G
+ tlnTable_[42] = 7; //GGG -> G
+ tlnTable_[43] = 7; //GGT -> G
+ tlnTable_[44] = 19; //GTA -> V
+ tlnTable_[45] = 19; //GTC -> V
+ tlnTable_[46] = 19; //GTG -> V
+ tlnTable_[47] = 19; //GTT -> V
+ tlnTable_[48] = -99; //TAA -> STOP
+ tlnTable_[49] = 18; //TAC -> Y
+ tlnTable_[50] = -99; //TAG -> STOP
+ tlnTable_[51] = 18; //TAT -> Y
+ tlnTable_[52] = 15; //TCA -> S
+ tlnTable_[53] = 15; //TCC -> S
+ tlnTable_[54] = 15; //TCG -> S
+ tlnTable_[55] = 15; //TCT -> S
+ tlnTable_[56] = 17; //TGA -> W
+ tlnTable_[57] = 4; //TGC -> C
+ tlnTable_[58] = 17; //TGG -> W
+ tlnTable_[59] = 4; //TGT -> C
+ tlnTable_[60] = 10; //TTA -> L
+ tlnTable_[61] = 13; //TTC -> F
+ tlnTable_[62] = 10; //TTG -> L
+ tlnTable_[63] = 13; //TTT -> F
+ tlnTable_[codonAlphabet_.getUnknownCharacterCode()] = proteicAlphabet_.getUnknownCharacterCode();
}
diff --git a/src/Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.h b/src/Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.h
index a597fa0..b104338 100644
--- a/src/Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.h
+++ b/src/Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.h
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -50,21 +50,61 @@ namespace bpp
* @brief This class implements the vertebrate mitochondrial genetic code as describe on the NCBI
* web site: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG2
*/
-
class VertebrateMitochondrialGeneticCode:
- public GeneticCode
+ public virtual GeneticCode
{
public:
- VertebrateMitochondrialGeneticCode(const NucleicAlphabet* alpha);
- virtual ~VertebrateMitochondrialGeneticCode();
+ VertebrateMitochondrialGeneticCode(const NucleicAlphabet* alphabet);
+
+ virtual ~VertebrateMitochondrialGeneticCode() {}
- public:
- int translate(int state) const throw (Exception);
- std::string translate(const std::string& state) const throw (Exception);
- Sequence* translate(const Sequence& sequence) const throw (Exception)
- {
- return GeneticCode::translate(sequence);
- }
+ virtual VertebrateMitochondrialGeneticCode* clone() const {
+ return new VertebrateMitochondrialGeneticCode(*this);
+ }
+
+ public:
+ size_t getNumberOfStopCodons() const { return 4.; }
+
+ std::vector<int> getStopCodonsAsInt() const {
+ std::vector<int> v(4);
+ v[0] = 8;
+ v[1] = 10;
+ v[2] = 48;
+ v[3] = 50;
+ return v;
+ }
+
+ std::vector<std::string> getStopCodonsAsChar() const {
+ std::vector<std::string> v(4);
+ v[0] = "AGA";
+ v[1] = "AGG";
+ v[2] = "TAA";
+ v[4] = "TAG";
+ return v;
+ }
+
+ bool isStop(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 48 || state == 50 || state == 8 || state == 10);
+ }
+
+ bool isStop(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 48 || i == 50 || i == 8 || i == 10);
+ }
+
+ bool isAltStart(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 12 || state == 13 || state == 15 || state == 46);
+ }
+
+ bool isAltStart(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 12 || i == 13 || i == 15 || i == 46);
+ }
+
};
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.cpp b/src/Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.cpp
index d55c58a..4fb6cfe 100644
--- a/src/Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.cpp
+++ b/src/Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.cpp
@@ -5,7 +5,7 @@
//
/*
-Copyright or © or Copr. CNRS, (November 17, 2004)
+Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
This software is a computer program whose purpose is to provide classes
for sequences analysis.
@@ -38,8 +38,6 @@ knowledge of the CeCILL license and that you accept its terms.
*/
#include "YeastMitochondrialGeneticCode.h"
-#include "../Alphabet/YeastMitochondrialCodonAlphabet.h"
-#include "../Alphabet/ProteicAlphabet.h"
using namespace bpp;
@@ -47,124 +45,73 @@ using namespace bpp;
using namespace std;
-YeastMitochondrialGeneticCode::YeastMitochondrialGeneticCode(const NucleicAlphabet * alpha) : GeneticCode()
+YeastMitochondrialGeneticCode::YeastMitochondrialGeneticCode(const NucleicAlphabet* alphabet) :
+ GeneticCode(alphabet)
{
- codonAlphabet_ = new YeastMitochondrialCodonAlphabet(alpha);
- proteicAlphabet_ = new ProteicAlphabet();
-}
-
-YeastMitochondrialGeneticCode::~YeastMitochondrialGeneticCode()
-{
- delete codonAlphabet_;
- delete proteicAlphabet_;
-}
-
-int YeastMitochondrialGeneticCode::translate(int state) const throw (Exception)
-{
- if(state == codonAlphabet_->getUnknownCharacterCode()) return proteicAlphabet_->getUnknownCharacterCode();
- vector<int> positions = codonAlphabet_->getPositions(state);
- switch(positions[0])
- {
- //First position:
- case 0 : //A
- switch(positions[1])
- {
- //Second position:
- case 0 : //AA
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("K"); //Lysine
- case 1 : case 3 : return proteicAlphabet_->charToInt("N"); //Asparagine
- }
- case 1 : //AC
- return proteicAlphabet_->charToInt("T"); //Threonine
- case 2 : //AG
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("S"); //Serine
- case 1 : case 3 : return proteicAlphabet_->charToInt("S"); //Serine
- }
- case 3 : //AT
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2: return proteicAlphabet_->charToInt("M"); //Methionine
- case 1 : case 3 : return proteicAlphabet_->charToInt("I"); //Isoleucine
- }
- }
- case 1 : //C
- switch(positions[1])
- {
- //Second position:
- case 0 : //CA
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("Q"); //Glutamine
- case 1 : case 3 : return proteicAlphabet_->charToInt("H"); //Histidine
- }
- case 1 : //CC
- return proteicAlphabet_->charToInt("P"); //Proline
- case 2 : //CG
- return proteicAlphabet_->charToInt("R"); //Arginine
- case 3 : //CT
- return proteicAlphabet_->charToInt("T"); //Threonine
- }
- case 2 : //G
- switch(positions[1])
- {
- //Second position:
- case 0 : //GA
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("E"); //Glutamic acid
- case 1 : case 3 : return proteicAlphabet_->charToInt("D"); //Aspartic acid
- }
- case 1 : //GC
- return proteicAlphabet_->charToInt("A"); //Alanine
- case 2 : //GG
- return proteicAlphabet_->charToInt("G"); //Glycine
- case 3 : //GT
- return proteicAlphabet_->charToInt("V"); //Valine
- }
- case 3 : //T(U)
- switch(positions[1])
- {
- //Second position:
- case 0 : //TA
- switch(positions[2])
- {
- //Third position:
- case 0 : throw StopCodonException("", "TAA"); //Stop codon
- case 2 : throw StopCodonException("", "TAG"); //Stop codon
- case 1 : case 3 : return proteicAlphabet_->charToInt("Y"); //Tyrosine
- }
- case 1 : //TC
- return proteicAlphabet_->charToInt("S"); //Serine
- case 2 : //TG
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("W"); //Tryptophane
- case 1 : case 3 : return proteicAlphabet_->charToInt("C"); //Cysteine
- }
- case 3 : //TT
- switch(positions[2])
- {
- //Third position:
- case 0 : case 2 : return proteicAlphabet_->charToInt("L"); //Leucine
- case 1 : case 3 : return proteicAlphabet_->charToInt("F"); //Phenylalanine
- }
- }
- }
- throw BadIntException(state, "YeastMitochondrialGeneticCode::translate", codonAlphabet_);
-}
-
-string YeastMitochondrialGeneticCode::translate(const string & state) const throw (Exception)
-{
- return proteicAlphabet_->intToChar(translate(codonAlphabet_->charToInt(state)));
+ tlnTable_[0] = 11; //AAA -> K
+ tlnTable_[1] = 2; //AAC -> N
+ tlnTable_[2] = 11; //AAG -> K
+ tlnTable_[3] = 2; //AAT -> N
+ tlnTable_[4] = 16; //ACA -> T
+ tlnTable_[5] = 16; //ACC -> T
+ tlnTable_[6] = 16; //ACG -> T
+ tlnTable_[7] = 16; //ACT -> T
+ tlnTable_[8] = 15; //AGA -> S
+ tlnTable_[9] = 15; //AGC -> S
+ tlnTable_[10] = 15; //AGG -> S
+ tlnTable_[11] = 15; //AGT -> S
+ tlnTable_[12] = 12; //ATA -> M
+ tlnTable_[13] = 9; //ATC -> I
+ tlnTable_[14] = 12; //ATG -> M
+ tlnTable_[15] = 9; //ATT -> I
+ tlnTable_[16] = 5; //CAA -> Q
+ tlnTable_[17] = 8; //CAC -> H
+ tlnTable_[18] = 5; //CAG -> Q
+ tlnTable_[19] = 8; //CAT -> H
+ tlnTable_[20] = 14; //CCA -> P
+ tlnTable_[21] = 14; //CCC -> P
+ tlnTable_[22] = 14; //CCG -> P
+ tlnTable_[23] = 14; //CCT -> P
+ tlnTable_[24] = 1; //CGA -> R
+ tlnTable_[25] = 1; //CGC -> R
+ tlnTable_[26] = 1; //CGG -> R
+ tlnTable_[27] = 1; //CGT -> R
+ tlnTable_[28] = 16; //CTA -> T
+ tlnTable_[29] = 16; //CTC -> T
+ tlnTable_[30] = 16; //CTG -> T
+ tlnTable_[31] = 16; //CTT -> T
+ tlnTable_[32] = 6; //GAA -> E
+ tlnTable_[33] = 3; //GAC -> D
+ tlnTable_[34] = 6; //GAG -> E
+ tlnTable_[35] = 3; //GAT -> D
+ tlnTable_[36] = 0; //GCA -> A
+ tlnTable_[37] = 0; //GCC -> A
+ tlnTable_[38] = 0; //GCG -> A
+ tlnTable_[39] = 0; //GCT -> A
+ tlnTable_[40] = 7; //GGA -> G
+ tlnTable_[41] = 7; //GGC -> G
+ tlnTable_[42] = 7; //GGG -> G
+ tlnTable_[43] = 7; //GGT -> G
+ tlnTable_[44] = 19; //GTA -> V
+ tlnTable_[45] = 19; //GTC -> V
+ tlnTable_[46] = 19; //GTG -> V
+ tlnTable_[47] = 19; //GTT -> V
+ tlnTable_[48] = -99; //TAA -> STOP
+ tlnTable_[49] = 18; //TAC -> Y
+ tlnTable_[50] = -99; //TAG -> STOP
+ tlnTable_[51] = 18; //TAT -> Y
+ tlnTable_[52] = 15; //TCA -> S
+ tlnTable_[53] = 15; //TCC -> S
+ tlnTable_[54] = 15; //TCG -> S
+ tlnTable_[55] = 15; //TCT -> S
+ tlnTable_[56] = 17; //TGA -> W
+ tlnTable_[57] = 4; //TGC -> C
+ tlnTable_[58] = 17; //TGG -> W
+ tlnTable_[59] = 4; //TGT -> C
+ tlnTable_[60] = 10; //TTA -> L
+ tlnTable_[61] = 13; //TTC -> F
+ tlnTable_[62] = 10; //TTG -> L
+ tlnTable_[63] = 13; //TTT -> F
+ tlnTable_[codonAlphabet_.getUnknownCharacterCode()] = proteicAlphabet_.getUnknownCharacterCode();
}
diff --git a/src/Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.h b/src/Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.h
index 178ff19..5f382a7 100644
--- a/src/Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.h
+++ b/src/Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.h
@@ -52,21 +52,58 @@ namespace bpp
* Mitochondrial genetic code as describe on the NCBI website:
* http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t#SG3
*/
-
class YeastMitochondrialGeneticCode:
- public GeneticCode
+ public virtual GeneticCode
{
public:
- YeastMitochondrialGeneticCode(const NucleicAlphabet * alpha);
- virtual ~YeastMitochondrialGeneticCode();
+ YeastMitochondrialGeneticCode(const NucleicAlphabet* alphaibet);
+
+ virtual ~YeastMitochondrialGeneticCode() {}
- public:
- int translate(int state) const throw (Exception);
- std::string translate(const std::string & state) const throw (Exception);
- Sequence * translate(const Sequence & sequence) const throw (Exception)
- {
- return GeneticCode::translate(sequence);
- }
+ virtual YeastMitochondrialGeneticCode* clone() const {
+ return new YeastMitochondrialGeneticCode(*this);
+ }
+
+ public:
+ size_t getNumberOfStopCodons() const { return 2.; }
+
+ std::vector<int> getStopCodonsAsInt() const {
+ std::vector<int> v(2);
+ v[1] = 48;
+ v[2] = 50;
+ return v;
+ }
+
+ std::vector<std::string> getStopCodonsAsChar() const {
+ std::vector<std::string> v(2);
+ v[1] = "TAA";
+ v[2] = "TAG";
+ return v;
+ }
+
+ bool isStop(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 48 || state == 50);
+ }
+
+ bool isStop(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 48 || i == 50);
+ }
+
+ bool isAltStart(int state) const throw (BadIntException) {
+ //Test:
+ codonAlphabet_.intToChar(state); //throw exception if invalid state!
+ return (state == 12);
+ }
+
+ bool isAltStart(const std::string& state) const throw (BadCharException) {
+ int i = codonAlphabet_.charToInt(state);
+ return (i == 12);
+ }
+
+
};
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Io/BppOAlignmentReaderFormat.cpp b/src/Bpp/Seq/Io/BppOAlignmentReaderFormat.cpp
index 689a9c0..2e812ef 100644
--- a/src/Bpp/Seq/Io/BppOAlignmentReaderFormat.cpp
+++ b/src/Bpp/Seq/Io/BppOAlignmentReaderFormat.cpp
@@ -67,53 +67,42 @@ IAlignment* BppOAlignmentReaderFormat::read(const std::string& description) thro
{
bool sequential = true, extended = true;
string split = " ";
- if (unparsedArguments_.find("order") != unparsedArguments_.end())
- {
- if (unparsedArguments_["order"] == "sequential")
- sequential = true;
- else if (unparsedArguments_["order"] == "interleaved")
- sequential = false;
- else
- ApplicationTools::displayWarning("Argument '" +
- unparsedArguments_["order"] +
- "' for argument 'Phylip#order' is unknown. " +
- "Default used instead: sequential.");
- }
+ string order = ApplicationTools::getStringParameter("order", unparsedArguments_, "sequential", "", true, warningLevel_);
+ if (order == "sequential")
+ sequential = true;
+ else if (order == "interleaved")
+ sequential = false;
else
- ApplicationTools::displayWarning("Argument 'Phylip#order' not found. Default used instead: sequential.");
- if (unparsedArguments_.find("type") != unparsedArguments_.end())
+ throw Exception("BppOAlignmentReaderFormat::read. Invalid argument 'order' for phylip format: " + order);
+
+ string type = ApplicationTools::getStringParameter("type", unparsedArguments_, "extended", "", true, warningLevel_);
+ if (type == "extended")
{
- if (unparsedArguments_["type"] == "extended")
- {
- extended = true;
- split = ApplicationTools::getStringParameter("split", unparsedArguments_, "spaces", "", true, false);
- if (split == "spaces")
- split = " ";
- else if (split == "tab")
- split = "\t";
- else
- throw Exception("Unknown option for Phylip#split: " + split);
- }
- else if (unparsedArguments_["type"] == "classic")
- extended = false;
+ extended = true;
+ split = ApplicationTools::getStringParameter("split", unparsedArguments_, "spaces", "", true, warningLevel_);
+ if (split == "spaces")
+ split = " ";
+ else if (split == "tab")
+ split = "\t";
else
- ApplicationTools::displayWarning("Argument '" +
- unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " +
- "Default used instead: extended.");
+ throw Exception("BppOAlignmentReaderFormat::read. Invalid argument 'split' for phylip format: " + split);
}
+ else if (type == "classic")
+ extended = false;
else
- ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: extended.");
+ throw Exception("BppOAlignmentReaderFormat::read. Invalid argument 'type' for phylip format: " + type);
+
iAln.reset(new Phylip(extended, sequential, 100, true, split));
}
else if (format == "Fasta")
{
- bool strictNames = ApplicationTools::getBooleanParameter("strict_names", unparsedArguments_, false, "", true, false);
- bool extended = ApplicationTools::getBooleanParameter("extended", unparsedArguments_, false, "", true, false);
+ bool strictNames = ApplicationTools::getBooleanParameter("strict_names", unparsedArguments_, false, "", true, warningLevel_);
+ bool extended = ApplicationTools::getBooleanParameter("extended", unparsedArguments_, false, "", true, warningLevel_);
iAln.reset(new Fasta(100, true, extended, strictNames));
}
else if (format == "Clustal")
{
- unsigned int extraSpaces = ApplicationTools::getParameter<unsigned int>("extraSpaces", unparsedArguments_, 0, "", true, false);
+ unsigned int extraSpaces = ApplicationTools::getParameter<unsigned int>("extraSpaces", unparsedArguments_, 0, "", true, warningLevel_);
iAln.reset(new Clustal(true, extraSpaces));
}
else if (format == "Dcse")
diff --git a/src/Bpp/Seq/Io/BppOAlignmentReaderFormat.h b/src/Bpp/Seq/Io/BppOAlignmentReaderFormat.h
index 05667a5..21d7ea9 100644
--- a/src/Bpp/Seq/Io/BppOAlignmentReaderFormat.h
+++ b/src/Bpp/Seq/Io/BppOAlignmentReaderFormat.h
@@ -57,13 +57,12 @@ namespace bpp
public virtual IOFormat
{
private:
- bool verbose_;
std::map<std::string, std::string> unparsedArguments_;
+ int warningLevel_;
public:
- BppOAlignmentReaderFormat(bool verbose = true):
- verbose_(verbose),
- unparsedArguments_() {}
+ BppOAlignmentReaderFormat(int warningLevel):
+ unparsedArguments_(), warningLevel_(warningLevel) {}
virtual ~BppOAlignmentReaderFormat() {}
public:
diff --git a/src/Bpp/Seq/Io/BppOAlignmentWriterFormat.cpp b/src/Bpp/Seq/Io/BppOAlignmentWriterFormat.cpp
index 035940d..05268ae 100644
--- a/src/Bpp/Seq/Io/BppOAlignmentWriterFormat.cpp
+++ b/src/Bpp/Seq/Io/BppOAlignmentWriterFormat.cpp
@@ -57,7 +57,7 @@ OAlignment* BppOAlignmentWriterFormat::read(const std::string& description) thro
unparsedArguments_.clear();
string format = "";
KeyvalTools::parseProcedure(description, format, unparsedArguments_);
- unsigned int ncol = ApplicationTools::getParameter<unsigned int>("length", unparsedArguments_, 100, "", true, false);
+ unsigned int ncol = ApplicationTools::getParameter<unsigned int>("length", unparsedArguments_, 100, "", true, warningLevel_);
auto_ptr<OAlignment> oAln;
if (format == "Fasta")
{
@@ -75,47 +75,36 @@ OAlignment* BppOAlignmentWriterFormat::read(const std::string& description) thro
{
bool sequential = true, extended = true;
string split = " ";
- if (unparsedArguments_.find("order") != unparsedArguments_.end())
- {
- if (unparsedArguments_["order"] == "sequential")
- sequential = true;
- else if (unparsedArguments_["order"] == "interleaved")
- sequential = false;
- else
- ApplicationTools::displayWarning("Argument '" +
- unparsedArguments_["order"] +
- "' for argument 'Phylip#order' is unknown. " +
- "Default used instead: sequential.");
- }
+ string order = ApplicationTools::getStringParameter("order", unparsedArguments_, "sequential", "", true, warningLevel_);
+ if (order == "sequential")
+ sequential = true;
+ else if (order == "interleaved")
+ sequential = false;
else
- ApplicationTools::displayWarning("Argument 'Phylip#order' not found. Default used instead: sequential.");
- if (unparsedArguments_.find("type") != unparsedArguments_.end())
+ throw Exception("BppOAlignmentWriterFormat::read. Invalid argument 'order' for phylip format: " + order);
+
+ string type = ApplicationTools::getStringParameter("type", unparsedArguments_, "extended", "", true, warningLevel_);
+ if (type == "extended")
{
- if (unparsedArguments_["type"] == "extended")
- {
- extended = true;
- split = ApplicationTools::getStringParameter("split", unparsedArguments_, "spaces", "", true, false);
- if (split == "spaces")
- split = " ";
- else if (split == "tab")
- split = "\t";
- else
- throw Exception("Unknown option for Phylip#split: " + split);
- }
- else if (unparsedArguments_["type"] == "classic")
- extended = false;
+ extended = true;
+ split = ApplicationTools::getStringParameter("split", unparsedArguments_, "spaces", "", true, warningLevel_);
+ if (split == "spaces")
+ split = " ";
+ else if (split == "tab")
+ split = "\t";
else
- ApplicationTools::displayWarning("Argument '" +
- unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " +
- "Default used instead: extended.");
+ throw Exception("BppOAlignmentWriterFormat::read. Invalid argument 'split' for phylip format: " + split);
}
+ else if (type == "classic")
+ extended = false;
else
- ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: extended.");
- oAln.reset(new Phylip(extended, sequential, ncol, true, split));
+ throw Exception("BppOAlignmentWriterFormat::read. Invalid argument 'type' for phylip format: " + type);
+
+ oAln.reset(new Phylip(extended, sequential, ncol, true, split));
}
else if (format == "Stockholm")
{
- oAln.reset(reinterpret_cast<OAlignment*>(new Stockholm()));
+ oAln.reset(new Stockholm());
}
else
{
diff --git a/src/Bpp/Seq/Io/BppOAlignmentWriterFormat.h b/src/Bpp/Seq/Io/BppOAlignmentWriterFormat.h
index a27451b..77a3a67 100644
--- a/src/Bpp/Seq/Io/BppOAlignmentWriterFormat.h
+++ b/src/Bpp/Seq/Io/BppOAlignmentWriterFormat.h
@@ -57,13 +57,14 @@ namespace bpp
public virtual IOFormat
{
private:
- bool verbose_;
std::map<std::string, std::string> unparsedArguments_;
+ int warningLevel_;
public:
- BppOAlignmentWriterFormat(bool verbose = true):
- verbose_(verbose),
- unparsedArguments_() {}
+ BppOAlignmentWriterFormat(int warningLevel):
+ unparsedArguments_(),
+ warningLevel_(warningLevel) {}
+
virtual ~BppOAlignmentWriterFormat() {}
public:
diff --git a/src/Bpp/Seq/Io/BppOAlphabetIndex1Format.cpp b/src/Bpp/Seq/Io/BppOAlphabetIndex1Format.cpp
index 2598dd3..ad8fda0 100644
--- a/src/Bpp/Seq/Io/BppOAlphabetIndex1Format.cpp
+++ b/src/Bpp/Seq/Io/BppOAlphabetIndex1Format.cpp
@@ -150,6 +150,5 @@ AlphabetIndex1* BppOAlphabetIndex1Format::read(const std::string& description) t
{
return 0;
}
-
}
diff --git a/src/Bpp/Seq/Io/BppOAlphabetIndex2Format.cpp b/src/Bpp/Seq/Io/BppOAlphabetIndex2Format.cpp
index 6a17c86..a1224fd 100644
--- a/src/Bpp/Seq/Io/BppOAlphabetIndex2Format.cpp
+++ b/src/Bpp/Seq/Io/BppOAlphabetIndex2Format.cpp
@@ -75,7 +75,7 @@ AlphabetIndex2* BppOAlphabetIndex2Format::read(const std::string& description) t
}
else if (name == "Grantham")
{
- bool sym = ApplicationTools::getBooleanParameter("symmetrical", args, true, "", true);
+ bool sym = ApplicationTools::getBooleanParameter("symmetrical", args, true, "", true, 1);
GranthamAAChemicalDistance* M = new GranthamAAChemicalDistance();
M->setSymmetric(sym);
if (!sym) M->setPC1Sign(true);
@@ -83,14 +83,14 @@ AlphabetIndex2* BppOAlphabetIndex2Format::read(const std::string& description) t
}
else if (name == "Miyata")
{
- bool sym = ApplicationTools::getBooleanParameter("symmetrical", args, true, "", true);
+ bool sym = ApplicationTools::getBooleanParameter("symmetrical", args, true, "", true, 1);
MiyataAAChemicalDistance* M = new MiyataAAChemicalDistance();
M->setSymmetric(sym);
return M;
}
else if (name == "Diff")
{
- string index1Desc = ApplicationTools::getStringParameter("index1", args, "None", "", true);
+ string index1Desc = ApplicationTools::getStringParameter("index1", args, "None", "", true, 1);
bool sym = ApplicationTools::getBooleanParameter("symmetrical", args, true, "", true);
BppOAlphabetIndex1Format index1Reader(alphabet_, "" , false);
AlphabetIndex1* index1 = index1Reader.read(index1Desc);
@@ -104,7 +104,7 @@ AlphabetIndex2* BppOAlphabetIndex2Format::read(const std::string& description) t
}
else if (name == "User")
{
- bool sym = ApplicationTools::getBooleanParameter("symmetrical", args, true, "", true);
+ bool sym = ApplicationTools::getBooleanParameter("symmetrical", args, true, "", true, 1);
string aax2FilePath = ApplicationTools::getAFilePath("file", args, true, true, "", false);
ifstream aax2File(aax2FilePath.c_str(), ios::in);
AAIndex2Entry* M = new AAIndex2Entry(aax2File, sym);
@@ -120,6 +120,5 @@ AlphabetIndex2* BppOAlphabetIndex2Format::read(const std::string& description) t
{
return 0;
}
-
}
diff --git a/src/Bpp/Seq/Io/BppOSequenceReaderFormat.cpp b/src/Bpp/Seq/Io/BppOSequenceReaderFormat.cpp
index d1bc2d1..1d6c3a9 100644
--- a/src/Bpp/Seq/Io/BppOSequenceReaderFormat.cpp
+++ b/src/Bpp/Seq/Io/BppOSequenceReaderFormat.cpp
@@ -68,53 +68,42 @@ ISequence* BppOSequenceReaderFormat::read(const std::string& description) throw
{
bool sequential = true, extended = true;
string split = " ";
- if (unparsedArguments_.find("order") != unparsedArguments_.end())
- {
- if (unparsedArguments_["order"] == "sequential")
- sequential = true;
- else if (unparsedArguments_["order"] == "interleaved")
- sequential = false;
- else
- ApplicationTools::displayWarning("Argument '" +
- unparsedArguments_["order"] +
- "' for argument 'Phylip#order' is unknown. " +
- "Default used instead: sequential.");
- }
+ string order = ApplicationTools::getStringParameter("order", unparsedArguments_, "sequential", "", true, warningLevel_);
+ if (order == "sequential")
+ sequential = true;
+ else if (order == "interleaved")
+ sequential = false;
else
- ApplicationTools::displayWarning("Argument 'Phylip#order' not found. Default used instead: sequential.");
- if (unparsedArguments_.find("type") != unparsedArguments_.end())
+ throw Exception("BppOAlignmentReaderFormat::read. Invalid argument 'order' for phylip format: " + order);
+
+ string type = ApplicationTools::getStringParameter("type", unparsedArguments_, "extended", "", true, warningLevel_);
+ if (type == "extended")
{
- if (unparsedArguments_["type"] == "extended")
- {
- extended = true;
- split = ApplicationTools::getStringParameter("split", unparsedArguments_, "spaces", "", true, false);
- if (split == "spaces")
- split = " ";
- else if (split == "tab")
- split = "\t";
- else
- throw Exception("Unknown option for Phylip#split: " + split);
- }
- else if (unparsedArguments_["type"] == "classic")
- extended = false;
+ extended = true;
+ split = ApplicationTools::getStringParameter("split", unparsedArguments_, "spaces", "", true, warningLevel_);
+ if (split == "spaces")
+ split = " ";
+ else if (split == "tab")
+ split = "\t";
else
- ApplicationTools::displayWarning("Argument '" +
- unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " +
- "Default used instead: extended.");
+ throw Exception("BppOAlignmentReaderFormat::read. Invalid argument 'split' for phylip format: " + split);
}
+ else if (type == "classic")
+ extended = false;
else
- ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: extended.");
+ throw Exception("BppOAlignmentReaderFormat::read. Invalid argument 'type' for phylip format: " + type);
+
iSeq.reset(new Phylip(extended, sequential, 100, true, split));
}
else if (format == "Fasta")
{
- bool strictNames = ApplicationTools::getBooleanParameter("strict_names", unparsedArguments_, false, "", true, false);
- bool extended = ApplicationTools::getBooleanParameter("extended", unparsedArguments_, false, "", true, false);
+ bool strictNames = ApplicationTools::getBooleanParameter("strict_names", unparsedArguments_, false, "", true, warningLevel_);
+ bool extended = ApplicationTools::getBooleanParameter("extended", unparsedArguments_, false, "", true, warningLevel_);
iSeq.reset(new Fasta(100, true, extended, strictNames));
}
else if (format == "Clustal")
{
- unsigned int extraSpaces = ApplicationTools::getParameter<unsigned int>("extraSpaces", unparsedArguments_, 0, "", true, false);
+ unsigned int extraSpaces = ApplicationTools::getParameter<unsigned int>("extraSpaces", unparsedArguments_, 0, "", true, warningLevel_);
iSeq.reset(new Clustal(true, extraSpaces));
}
else if (format == "Dcse")
@@ -123,7 +112,7 @@ ISequence* BppOSequenceReaderFormat::read(const std::string& description) throw
}
else if (format == "GenBank")
{
- iSeq.reset(reinterpret_cast<ISequence*>(new GenBank())); // This is required to remove a strict-aliasing warning in gcc 4.4
+ iSeq.reset(new GenBank()); // This is required to remove a strict-aliasing warning in gcc 4.4
}
else if (format == "Nexus")
{
diff --git a/src/Bpp/Seq/Io/BppOSequenceReaderFormat.h b/src/Bpp/Seq/Io/BppOSequenceReaderFormat.h
index 7d11267..3b3edd5 100644
--- a/src/Bpp/Seq/Io/BppOSequenceReaderFormat.h
+++ b/src/Bpp/Seq/Io/BppOSequenceReaderFormat.h
@@ -57,13 +57,13 @@ namespace bpp
public virtual IOFormat
{
private:
- bool verbose_;
std::map<std::string, std::string> unparsedArguments_;
+ int warningLevel_;
public:
- BppOSequenceReaderFormat(bool verbose = true):
- verbose_(verbose),
- unparsedArguments_() {}
+ BppOSequenceReaderFormat(int warningLevel):
+ unparsedArguments_(), warningLevel_(warningLevel) {}
+
virtual ~BppOSequenceReaderFormat() {}
public:
diff --git a/src/Bpp/Seq/Io/BppOSequenceStreamReaderFormat.h b/src/Bpp/Seq/Io/BppOSequenceStreamReaderFormat.h
index 7039176..5b1af48 100644
--- a/src/Bpp/Seq/Io/BppOSequenceStreamReaderFormat.h
+++ b/src/Bpp/Seq/Io/BppOSequenceStreamReaderFormat.h
@@ -52,18 +52,15 @@ namespace bpp
* Creates a new ISequenceStream object according to
* distribution description syntax (see the Bio++ Program Suite
* manual for a detailed description of this syntax).
- *
*/
class BppOSequenceStreamReaderFormat:
public virtual IOFormat
{
private:
- bool verbose_;
std::map<std::string, std::string> unparsedArguments_;
public:
- BppOSequenceStreamReaderFormat(bool verbose = true):
- verbose_(verbose),
+ BppOSequenceStreamReaderFormat():
unparsedArguments_() {}
virtual ~BppOSequenceStreamReaderFormat() {}
diff --git a/src/Bpp/Seq/Io/BppOSequenceWriterFormat.cpp b/src/Bpp/Seq/Io/BppOSequenceWriterFormat.cpp
index eb1c39e..7685612 100644
--- a/src/Bpp/Seq/Io/BppOSequenceWriterFormat.cpp
+++ b/src/Bpp/Seq/Io/BppOSequenceWriterFormat.cpp
@@ -54,7 +54,7 @@ OSequence* BppOSequenceWriterFormat::read(const std::string& description) throw
unparsedArguments_.clear();
string format = "";
KeyvalTools::parseProcedure(description, format, unparsedArguments_);
- unsigned int ncol = ApplicationTools::getParameter<unsigned int>("length", unparsedArguments_, 100, "", true, false);
+ unsigned int ncol = ApplicationTools::getParameter<unsigned int>("length", unparsedArguments_, 100, "", true, warningLevel_);
auto_ptr<OSequence> oSeq;
if (format == "Fasta")
{
diff --git a/src/Bpp/Seq/Io/BppOSequenceWriterFormat.h b/src/Bpp/Seq/Io/BppOSequenceWriterFormat.h
index cc5f6fc..69fed19 100644
--- a/src/Bpp/Seq/Io/BppOSequenceWriterFormat.h
+++ b/src/Bpp/Seq/Io/BppOSequenceWriterFormat.h
@@ -57,13 +57,12 @@ namespace bpp
public virtual IOFormat
{
private:
- bool verbose_;
std::map<std::string, std::string> unparsedArguments_;
+ int warningLevel_;
public:
- BppOSequenceWriterFormat(bool verbose = true):
- verbose_(verbose),
- unparsedArguments_() {}
+ BppOSequenceWriterFormat(int warningLevel):
+ unparsedArguments_(), warningLevel_(warningLevel) {}
virtual ~BppOSequenceWriterFormat() {}
public:
diff --git a/src/Bpp/Seq/Io/Dcse.cpp b/src/Bpp/Seq/Io/Dcse.cpp
index 7711850..8729deb 100644
--- a/src/Bpp/Seq/Io/Dcse.cpp
+++ b/src/Bpp/Seq/Io/Dcse.cpp
@@ -74,7 +74,7 @@ void DCSE::appendAlignmentFromStream(istream& input, SiteContainer& sc) const th
if(line == "") break;
string::size_type endOfSeq = line.find(" ");
if(endOfSeq == line.npos) break;
- sequence = string(line.begin(), line.begin() + endOfSeq);
+ sequence = string(line.begin(), line.begin() + static_cast<ptrdiff_t>(endOfSeq));
sequence = TextTools::removeWhiteSpaces(sequence);
sequence = TextTools::removeChar(sequence, '{');
sequence = TextTools::removeChar(sequence, '}');
@@ -83,7 +83,7 @@ void DCSE::appendAlignmentFromStream(istream& input, SiteContainer& sc) const th
sequence = TextTools::removeChar(sequence, '(');
sequence = TextTools::removeChar(sequence, ')');
sequence = TextTools::removeChar(sequence, '^');
- name = string(line.begin() + endOfSeq + 1, line.end()),
+ name = string(line.begin() + static_cast<ptrdiff_t>(endOfSeq + 1), line.end()),
name = TextTools::removeFirstWhiteSpaces(name);
if(name.find("Helix numbering") == name.npos
&& name.find("mask") == name.npos)
diff --git a/src/Bpp/Seq/Io/Fasta.cpp b/src/Bpp/Seq/Io/Fasta.cpp
index ca308bb..e990910 100644
--- a/src/Bpp/Seq/Io/Fasta.cpp
+++ b/src/Bpp/Seq/Io/Fasta.cpp
@@ -130,15 +130,16 @@ void Fasta::writeSequence(ostream& output, const Sequence& seq) const throw (Exc
output << endl;
// Sequence content
string buffer; // use a buffer to format sequence with states > 1 char
- for (unsigned int i = 0 ; i < seq.size() ; i++)
+ for (size_t i = 0 ; i < seq.size() ; ++i)
{
buffer += seq.getChar(i);
- if (buffer.size() >= charsByLine_ || i + 1 == seq.size())
+ if (buffer.size() >= charsByLine_)
{
- output << string(buffer.begin(), buffer.begin() + charsByLine_ < buffer.end() ? buffer.begin() + charsByLine_ : buffer.end()) << endl;
+ output << string(buffer.begin(), buffer.begin() + charsByLine_) << endl;
buffer.erase(0, charsByLine_);
}
}
+ output << string(buffer.begin(), buffer.end()) << endl;
}
/******************************************************************************/
@@ -217,7 +218,7 @@ void Fasta::writeSequences(ostream& output, const SequenceContainer& sc) const t
// Main loop : for all sequences in vector container
vector<string> names = sc.getSequencesNames();
- for (unsigned int i = 0; i < names.size(); i ++)
+ for (size_t i = 0; i < names.size(); ++i)
{
writeSequence(output, sc.getSequence(names[i]));
}
diff --git a/src/Bpp/Seq/Io/Mase.cpp b/src/Bpp/Seq/Io/Mase.cpp
index d09f0f7..8014b89 100644
--- a/src/Bpp/Seq/Io/Mase.cpp
+++ b/src/Bpp/Seq/Io/Mase.cpp
@@ -194,7 +194,7 @@ void Mase::readHeader_(std::istream& input, MaseHeader& header) const throw (Exc
//Site selection:
string::size_type index = line.find("# of");
if (index < line.npos) {
- StringTokenizer st(string(line.begin() + index + 4, line.end()), " \t=;");
+ StringTokenizer st(string(line.begin() + static_cast<ptrdiff_t>(index + 4), line.end()), " \t=;");
st.nextToken(); //skip next word: may be 'regions' or 'segments' or else ;-)
unsigned int numberOfSegments = TextTools::to<unsigned int>(st.nextToken());
string name = st.unparseRemainingTokens();
diff --git a/src/Bpp/Seq/Io/MaseTools.cpp b/src/Bpp/Seq/Io/MaseTools.cpp
index d42c22f..5b4b060 100644
--- a/src/Bpp/Seq/Io/MaseTools.cpp
+++ b/src/Bpp/Seq/Io/MaseTools.cpp
@@ -58,7 +58,7 @@ SiteSelection MaseTools::getSiteSet(const Comments& maseFileHeader, const string
string::size_type index = current.find("# of");
if (index < current.npos)
{
- StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t=;");
+ StringTokenizer st(string(current.begin() + static_cast<ptrdiff_t>(index + 4), current.end()), " \t=;");
st.nextToken(); // skip next word: may be 'regions' or 'segments' or else ;-)
size_t numberOfSegments = TextTools::to<size_t>(st.nextToken());
string name = st.unparseRemainingTokens();
@@ -111,7 +111,7 @@ SequenceSelection MaseTools::getSequenceSet(const Comments& maseFileHeader, cons
string::size_type index = current.find("@ of");
if (index < current.npos)
{
- StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t=;");
+ StringTokenizer st(string(current.begin() + static_cast<ptrdiff_t>(index + 4), current.end()), " \t=;");
st.nextToken(); // skip next word: may be 'sequences' or else ;-)
size_t numberOfSequences = TextTools::to<size_t>(st.nextToken());
string name = st.unparseRemainingTokens();
@@ -130,7 +130,7 @@ SequenceSelection MaseTools::getSequenceSet(const Comments& maseFileHeader, cons
int seqIndex = TextTools::toInt(st2.nextToken());
// WARNING!!! In the mase+ format, sequences are numbered from 1 to nbSequences,
// Whereas in SequenceContainer the index begins at 0.
- selection.push_back(seqIndex - 1); // bounds included.
+ selection.push_back(static_cast<size_t>(seqIndex - 1)); // bounds included.
counter++;
if (counter == numberOfSequences)
return selection;
@@ -154,29 +154,29 @@ SiteContainer* MaseTools::getSelectedSites(
{
SiteSelection ss = getSiteSet(sequences.getGeneralComments(), setName);
// We need to convert positions in case of word alphabet:
- size_t wsize = sequences.getAlphabet()->getStateCodingSize();
- if (wsize > 1)
- {
- if (ss.size() % wsize != 0)
- throw IOException("MaseTools::getSelectedSites: Site selection is not compatible with the alphabet in use in the container.");
- SiteSelection ss2;
- for (size_t i = 0; i < ss.size(); i += wsize)
- {
- if (ss[i] % wsize != 0)
- throw IOException("MaseTools::getSelectedSites: Site selection is not compatible with the alphabet in use in the container.");
- for (size_t j = 1; j < wsize; ++j)
- {
- if (ss[i + j] != (ss[i + j - 1] + 1))
- throw IOException("MaseTools::getSelectedSites: Site selection is not compatible with the alphabet in use in the container.");
- }
- ss2.push_back(ss[i] / wsize);
- }
- return SiteContainerTools::getSelectedSites(sequences, ss2);
- }
- else
- {
- return SiteContainerTools::getSelectedSites(sequences, ss);
- }
+ // size_t wsize = sequences.getAlphabet()->getStateCodingSize();
+ // if (wsize > 1)
+ // {
+ // if (ss.size() % wsize != 0)
+ // throw IOException("MaseTools::getSelectedSites: Site selection is not compatible with the alphabet in use in the container.");
+ // SiteSelection ss2;
+ // for (size_t i = 0; i < ss.size(); i += wsize)
+ // {
+ // if (ss[i] % wsize != 0)
+ // throw IOException("MaseTools::getSelectedSites: Site selection is not compatible with the alphabet in use in the container.");
+ // for (size_t j = 1; j < wsize; ++j)
+ // {
+ // if (ss[i + j] != (ss[i + j - 1] + 1))
+ // throw IOException("MaseTools::getSelectedSites: Site selection is not compatible with the alphabet in use in the container.");
+ // }
+ // ss2.push_back(ss[i] / wsize);
+ // }
+ return SiteContainerTools::getSelectedPositions(sequences, ss);
+ // }
+ // else
+ // {
+ // return SiteContainerTools::getSelectedSites(sequences, ss);
+ // }
}
/******************************************************************************/
@@ -203,9 +203,9 @@ map<string, size_t> MaseTools::getAvailableSiteSelections(const Comments& maseHe
string::size_type index = current.find("# of");
if (index < current.npos)
{
- StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t\n\f\r=;");
+ StringTokenizer st(string(current.begin() + static_cast<ptrdiff_t>(index + 4), current.end()), " \t\n\f\r=;");
st.nextToken(); // skip next word: may be 'sequences' or else ;-)
- size_t numberOfSegments = TextTools::toInt(st.nextToken());
+ size_t numberOfSegments = TextTools::to<size_t>(st.nextToken());
string name = st.nextToken();
while (st.hasMoreToken())
{
@@ -222,8 +222,8 @@ map<string, size_t> MaseTools::getAvailableSiteSelections(const Comments& maseHe
while (st2.hasMoreToken())
{
StringTokenizer st3(st2.nextToken(), ",");
- size_t begin = TextTools::toInt(st3.nextToken());
- size_t end = TextTools::toInt(st3.nextToken());
+ size_t begin = TextTools::to<size_t>(st3.nextToken());
+ size_t end = TextTools::to<size_t>(st3.nextToken());
counter++;
nbSites += end - begin + 1;
}
@@ -250,7 +250,7 @@ map<string, size_t> MaseTools::getAvailableSequenceSelections(const Comments& ma
string::size_type index = current.find("@ of");
if (index < current.npos)
{
- StringTokenizer st(string(current.begin() + index + 4, current.end()), " \t\n\f\r=;");
+ StringTokenizer st(string(current.begin() + static_cast<ptrdiff_t>(index + 4), current.end()), " \t\n\f\r=;");
st.nextToken(); // skip next word: may be 'sequences' or else ;-)
size_t numberOfSequences = TextTools::fromString<size_t>(st.nextToken());
string name = st.nextToken();
@@ -277,7 +277,7 @@ size_t MaseTools::getPhase(const Comments& maseFileHeader, const string& setName
index = current.find("# of");
if (index < current.npos)
{
- StringTokenizer st(string(current.begin() + index + 12, current.end()), " \t\n\f\r=;");
+ StringTokenizer st(string(current.begin() + static_cast<ptrdiff_t>(index + 12), current.end()), " \t\n\f\r=;");
// size_t numberOfSegments = TextTools::toInt(st.nextToken());
// cout << "Number of regions: " << st.nextToken() << endl;
string name;
@@ -295,8 +295,8 @@ size_t MaseTools::getPhase(const Comments& maseFileHeader, const string& setName
index = current.find("/codon_start");
if (index < current.npos)
{
- StringTokenizer st(string(current.begin() + index + 12, current.end()), " \t\n\f\r=;");
- phase = TextTools::toInt(st.nextToken());
+ StringTokenizer st(string(current.begin() + static_cast<ptrdiff_t>(index + 12), current.end()), " \t\n\f\r=;");
+ phase = TextTools::to<size_t>(st.nextToken());
}
}
throw Exception("PolymorphismSequenceContainer::getPhase: no /codon_start found, or site selection missing.");
diff --git a/src/Bpp/Seq/Io/MaseTools.h b/src/Bpp/Seq/Io/MaseTools.h
index 2206fd4..45c25b0 100644
--- a/src/Bpp/Seq/Io/MaseTools.h
+++ b/src/Bpp/Seq/Io/MaseTools.h
@@ -5,36 +5,36 @@
//
/*
-Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
-
-This software is a computer program whose purpose is to provide classes
-for sequences analysis.
-
-This software is governed by the CeCILL license under French law and
-abiding by the rules of distribution of free software. You can use,
-modify and/ or redistribute the software under the terms of the CeCILL
-license as circulated by CEA, CNRS and INRIA at the following URL
-"http://www.cecill.info".
-
-As a counterpart to the access to the source code and rights to copy,
-modify and redistribute granted by the license, users are provided only
-with a limited warranty and the software's author, the holder of the
-economic rights, and the successive licensors have only limited
-liability.
-
-In this respect, the user's attention is drawn to the risks associated
-with loading, using, modifying and/or developing or reproducing the
-software by the user in light of its specific status of free software,
-that may mean that it is complicated to manipulate, and that also
-therefore means that it is reserved for developers and experienced
-professionals having in-depth computer knowledge. Users are therefore
-encouraged to load and test the software's suitability as regards their
-requirements in conditions enabling the security of their systems and/or
-data to be ensured and, more generally, to use and operate it in the
-same conditions as regards security.
-
-The fact that you are presently reading this means that you have had
-knowledge of the CeCILL license and that you accept its terms.
+ Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
+
+ This software is a computer program whose purpose is to provide classes
+ for sequences analysis.
+
+ This software is governed by the CeCILL license under French law and
+ abiding by the rules of distribution of free software. You can use,
+ modify and/ or redistribute the software under the terms of the CeCILL
+ license as circulated by CEA, CNRS and INRIA at the following URL
+ "http://www.cecill.info".
+
+ As a counterpart to the access to the source code and rights to copy,
+ modify and redistribute granted by the license, users are provided only
+ with a limited warranty and the software's author, the holder of the
+ economic rights, and the successive licensors have only limited
+ liability.
+
+ In this respect, the user's attention is drawn to the risks associated
+ with loading, using, modifying and/or developing or reproducing the
+ software by the user in light of its specific status of free software,
+ that may mean that it is complicated to manipulate, and that also
+ therefore means that it is reserved for developers and experienced
+ professionals having in-depth computer knowledge. Users are therefore
+ encouraged to load and test the software's suitability as regards their
+ requirements in conditions enabling the security of their systems and/or
+ data to be ensured and, more generally, to use and operate it in the
+ same conditions as regards security.
+
+ The fact that you are presently reading this means that you have had
+ knowledge of the CeCILL license and that you accept its terms.
*/
#ifndef _MASETOOLS_H_
@@ -60,9 +60,9 @@ namespace bpp
* Most of the methods here hence work on the general comments associated
* to a container.
*/
-class MaseTools
-{
- public:
+ class MaseTools
+ {
+ public:
/**
* @brief Get a site selection from a Mase+ header file.
@@ -71,7 +71,7 @@ class MaseTools
* @param setName The name of the set to retrieve.
* @throw IOException If the specified set is not found.
*/
- static SiteSelection getSiteSet(const Comments& maseFileHeader, const std::string& setName) throw (IOException);
+ static SiteSelection getSiteSet(const Comments& maseFileHeader, const std::string& setName) throw (IOException);
/**
* @brief Get a sequence selection from a Mase+ header file.
@@ -124,21 +124,21 @@ class MaseTools
* @param maseHeader Comments as described in the Mase+ format specification.
* @return A vector of selection names.
*/
- static std::map<std::string, size_t> getAvailableSequenceSelections(const Comments & maseHeader);
-
- /**
- * @brief Get the phase of a given coding region from a mase+ header.
- *
- * Look for a /codon_start tag with a phase indice and a site selection with name setName.
- *
- * @param maseFileHeader Comments in Mase+ format.
- * @param setName a cds site selection name.
- * @return 1,2 or 3.
- * @throw Exception If no corresponding tag found in file.
- */
- static size_t getPhase(const Comments & maseFileHeader, const std::string &setName) throw (Exception);
-
-};
+ static std::map<std::string, size_t> getAvailableSequenceSelections(const Comments & maseHeader);
+
+ /**
+ * @brief Get the phase of a given coding region from a mase+ header.
+ *
+ * Look for a /codon_start tag with a phase indice and a site selection with name setName.
+ *
+ * @param maseFileHeader Comments in Mase+ format.
+ * @param setName a cds site selection name.
+ * @return 1,2 or 3.
+ * @throw Exception If no corresponding tag found in file.
+ */
+ static size_t getPhase(const Comments & maseFileHeader, const std::string &setName) throw (Exception);
+
+ };
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Io/Phylip.cpp b/src/Bpp/Seq/Io/Phylip.cpp
index 378335f..ff6f42d 100644
--- a/src/Bpp/Seq/Io/Phylip.cpp
+++ b/src/Bpp/Seq/Io/Phylip.cpp
@@ -190,7 +190,7 @@ unsigned int Phylip::getNumberOfSequences(const std::string& path) const throw (
string firstLine = FileTools::getNextLine(file);
StringTokenizer st(firstLine, " \t");
istringstream iss(st.nextToken());
- int nb;
+ unsigned int nb;
iss >> nb;
file.close();
return nb;
@@ -223,7 +223,7 @@ std::vector<std::string> Phylip::getSizedNames(const std::vector<std::string>& n
/******************************************************************************/
-void Phylip::writeSequential(std::ostream& out, const SequenceContainer& sc, int charsByLine) const
+void Phylip::writeSequential(std::ostream& out, const SequenceContainer& sc) const
{
//cout << "Write sequential" << endl;
size_t numberOfSites = sc.getSequence(sc.getSequencesNames()[0]).size() * sc.getAlphabet()->getStateCodingSize();
@@ -231,11 +231,11 @@ void Phylip::writeSequential(std::ostream& out, const SequenceContainer& sc, int
vector<string> seqNames = sc.getSequencesNames();
vector<string> names = getSizedNames(seqNames);
- for (size_t i = 0; i < seqNames.size(); i++)
+ for (size_t i = 0; i < seqNames.size(); ++i)
{
- vector<string> seq = TextTools::split(sc.toString(seqNames[i]), charsByLine);
+ vector<string> seq = TextTools::split(sc.toString(seqNames[i]), charsByLine_);
out << names[i] << seq[0] << endl;
- for (unsigned int j = 1; j < seq.size(); j++)
+ for (size_t j = 1; j < seq.size(); ++j)
{
out << string(names[i].size(), ' ') << seq[j] << endl;
}
@@ -243,7 +243,7 @@ void Phylip::writeSequential(std::ostream& out, const SequenceContainer& sc, int
}
}
-void Phylip::writeInterleaved(std::ostream& out, const SequenceContainer& sc, int charsByLine) const
+void Phylip::writeInterleaved(std::ostream& out, const SequenceContainer& sc) const
{
//cout << "Write interleaved;" << endl;
size_t numberOfSites = sc.getSequence(sc.getSequencesNames()[0]).size() * sc.getAlphabet()->getStateCodingSize();
@@ -253,20 +253,20 @@ void Phylip::writeInterleaved(std::ostream& out, const SequenceContainer& sc, in
vector<string> names = getSizedNames(seqNames);
//Split sequences:
vector< vector<string> > seqs(sc.getNumberOfSequences());
- for (size_t i = 0; i < seqNames.size(); i++)
+ for (size_t i = 0; i < seqNames.size(); ++i)
{
- seqs[i] = TextTools::split(sc.toString(seqNames[i]), charsByLine);
+ seqs[i] = TextTools::split(sc.toString(seqNames[i]), charsByLine_);
}
//Write first block:
- for (size_t i = 0; i < names.size(); i++)
+ for (size_t i = 0; i < names.size(); ++i)
{
out << names[i] << seqs[i][0] << endl;
}
out << endl;
//Write other blocks:
- for (size_t j = 1; j < seqs[0].size(); j++)
+ for (size_t j = 1; j < seqs[0].size(); ++j)
{
- for (unsigned int i = 0; i < sc.getNumberOfSequences(); i++)
+ for (size_t i = 0; i < sc.getNumberOfSequences(); ++i)
{
out << seqs[i][j] << endl;
}
@@ -285,8 +285,8 @@ void Phylip::writeAlignment(std::ostream& output, const SiteContainer& sc) const
// Checking the existence of specified file, and possibility to open it in write mode
if (!output) { throw IOException ("Phylip::write : failed to open file"); }
- if (sequential_) writeSequential (output, sc, charsByLine_);
- else writeInterleaved(output, sc, charsByLine_);
+ if (sequential_) writeSequential (output, sc);
+ else writeInterleaved(output, sc);
}
/******************************************************************************/
diff --git a/src/Bpp/Seq/Io/Phylip.h b/src/Bpp/Seq/Io/Phylip.h
index a7604e0..5d8646c 100644
--- a/src/Bpp/Seq/Io/Phylip.h
+++ b/src/Bpp/Seq/Io/Phylip.h
@@ -186,8 +186,8 @@ class Phylip :
void readInterleaved(std::istream& in, SiteContainer& asc) const throw (Exception);
//Writing tools:
std::vector<std::string> getSizedNames(const std::vector<std::string>& names) const;
- void writeSequential (std::ostream& out, const SequenceContainer& sc, int charsByLine) const;
- void writeInterleaved(std::ostream& out, const SequenceContainer& sc, int charsByLine) const;
+ void writeSequential(std::ostream& out, const SequenceContainer& sc) const;
+ void writeInterleaved(std::ostream& out, const SequenceContainer& sc) const;
};
} //end of namespace bpp.
diff --git a/src/Bpp/Seq/Sequence.cpp b/src/Bpp/Seq/Sequence.cpp
index ac05354..c0ff88b 100644
--- a/src/Bpp/Seq/Sequence.cpp
+++ b/src/Bpp/Seq/Sequence.cpp
@@ -179,7 +179,7 @@ void BasicSequence::setToSizeL(size_t newSize)
{
//We must truncate sequence from the left.
//This is a very unefficient method!
- content_.erase(content_.begin(), content_.begin() + (seqSize - newSize));
+ content_.erase(content_.begin(), content_.begin() + static_cast<ptrdiff_t>(seqSize - newSize));
return;
}
diff --git a/src/Bpp/Seq/SequencePositionIterators.cpp b/src/Bpp/Seq/SequencePositionIterators.cpp
index 4c20ebe..221eb29 100644
--- a/src/Bpp/Seq/SequencePositionIterators.cpp
+++ b/src/Bpp/Seq/SequencePositionIterators.cpp
@@ -42,18 +42,15 @@ knowledge of the CeCILL license and that you accept its terms.
using namespace bpp;
using namespace std; // for the STL
-//=================================
-// AbstractSequencePositionIterator
-//=================================
/******************************************************************************/
-bool AbstractSequencePositionIterator::operator==(const SequencePositionIterator & it) const {
+bool AbstractSequencePositionIterator::operator==(const SequencePositionIterator& it) const {
return this->getPosition() == it.getPosition();
}
/******************************************************************************/
-bool AbstractSequencePositionIterator::operator!=(const SequencePositionIterator & it) const {
+bool AbstractSequencePositionIterator::operator!=(const SequencePositionIterator& it) const {
return this->getPosition() != it.getPosition();
}
@@ -98,7 +95,7 @@ SimpleSequencePositionIterator::SimpleSequencePositionIterator(const SequencePos
/******************************************************************************/
-SimpleSequencePositionIterator & SimpleSequencePositionIterator::operator++() {
+SimpleSequencePositionIterator& SimpleSequencePositionIterator::operator++() {
this->setPosition(this->getPosition() + 1);
return *this;
}
@@ -106,28 +103,36 @@ SimpleSequencePositionIterator & SimpleSequencePositionIterator::operator++() {
/******************************************************************************/
SimpleSequencePositionIterator SimpleSequencePositionIterator::operator++(int i) {
- SimpleSequencePositionIterator ans = * this;
- ++(* this);
+ SimpleSequencePositionIterator ans = *this;
+ ++(*this);
return ans;
}
/******************************************************************************/
-SimpleSequencePositionIterator & SimpleSequencePositionIterator::operator+=(int i) {
- this->setPosition(this->getPosition() + i);
- return * this;
+SimpleSequencePositionIterator& SimpleSequencePositionIterator::operator+=(int i) {
+ if (i > 0)
+ this->setPosition(this->getPosition() + static_cast<unsigned int>(i));
+ else if (i < 0) {
+ unsigned int d = static_cast<unsigned int>(-i);
+ if (d > this->getPosition())
+ throw Exception("SimpleSequencePositionIterator::operator+=. Negative increment too large.");
+ else
+ this->setPosition(this->getPosition() - d);
+ }
+ return *this;
}
/******************************************************************************/
-SimpleSequencePositionIterator & SimpleSequencePositionIterator::operator-=(int i) {
- return (* this) += -i;
+SimpleSequencePositionIterator& SimpleSequencePositionIterator::operator-=(int i) {
+ return (*this) += -i;
}
/******************************************************************************/
SimpleSequencePositionIterator SimpleSequencePositionIterator::operator+(int i) const {
- SimpleSequencePositionIterator res(* this);
+ SimpleSequencePositionIterator res(*this);
res += i;
return res;
}
@@ -135,7 +140,7 @@ SimpleSequencePositionIterator SimpleSequencePositionIterator::operator+(int i)
/******************************************************************************/
SimpleSequencePositionIterator SimpleSequencePositionIterator::operator-(int i) const {
- return (* this) + (- i);
+ return (*this) + (- i);
}
/******************************************************************************/
diff --git a/src/Bpp/Seq/SequenceTools.cpp b/src/Bpp/Seq/SequenceTools.cpp
index f4d5ce7..60b852c 100644
--- a/src/Bpp/Seq/SequenceTools.cpp
+++ b/src/Bpp/Seq/SequenceTools.cpp
@@ -77,8 +77,8 @@ Sequence* SequenceTools::subseq(const Sequence& sequence, size_t begin, size_t e
vector<int> temp(sequence.getContent());
// Truncate sequence
- temp.erase(temp.begin() + end + 1, temp.end());
- temp.erase(temp.begin(), temp.begin() + begin);
+ temp.erase(temp.begin() + static_cast<ptrdiff_t>(end + 1), temp.end());
+ temp.erase(temp.begin(), temp.begin() + static_cast<ptrdiff_t>(begin));
// New sequence creation
return new BasicSequence(sequence.getName(), temp, sequence.getComments(), sequence.getAlphabet());
@@ -181,7 +181,7 @@ Sequence* SequenceTools::reverseTranscript(const Sequence& sequence) throw (Alph
Sequence& SequenceTools::invert(Sequence& seq)
{
size_t seq_size = seq.size(); // store seq size for efficiency
- unsigned int tmp_state = 0; // to store one state when swapping positions
+ int tmp_state = 0; // to store one state when swapping positions
size_t j = seq_size; // symetric position iterator from sequence end
for (size_t i = 0; i < seq_size / 2; i++)
{
@@ -304,6 +304,22 @@ size_t SequenceTools::getNumberOfCompleteSites(const Sequence& seq)
/******************************************************************************/
+Sequence* SequenceTools::getSequenceWithCompleteSites(const Sequence& seq)
+{
+ const Alphabet* alpha = seq.getAlphabet();
+ vector<int> content;
+ for (size_t i = 0; i < seq.size(); i++)
+ {
+ if (!(alpha->isGap(seq[i]) || alpha->isUnresolved(seq[i])))
+ content.push_back(seq[i]);
+ }
+ Sequence* newSeq = dynamic_cast<Sequence*>(seq.clone());
+ newSeq->setContent(content);
+ return newSeq;
+}
+
+/******************************************************************************/
+
size_t SequenceTools::getNumberOfUnresolvedSites(const Sequence& seq)
{
size_t count = 0;
@@ -346,7 +362,7 @@ void SequenceTools::removeGaps(Sequence& seq)
/******************************************************************************/
-Sequence* SequenceTools::getSequenceWithoutStops(const Sequence& seq) throw (Exception)
+Sequence* SequenceTools::getSequenceWithoutStops(const Sequence& seq, const GeneticCode& gCode) throw (Exception)
{
const CodonAlphabet* calpha = dynamic_cast<const CodonAlphabet*>(seq.getAlphabet());
if (!calpha)
@@ -354,7 +370,7 @@ Sequence* SequenceTools::getSequenceWithoutStops(const Sequence& seq) throw (Exc
vector<int> content;
for (size_t i = 0; i < seq.size(); i++)
{
- if (!calpha->isStop(seq[i]))
+ if (!gCode.isStop(seq[i]))
content.push_back(seq[i]);
}
Sequence* newSeq = dynamic_cast<Sequence*>(seq.clone());
@@ -364,21 +380,21 @@ Sequence* SequenceTools::getSequenceWithoutStops(const Sequence& seq) throw (Exc
/******************************************************************************/
-void SequenceTools::removeStops(Sequence& seq) throw (Exception)
+void SequenceTools::removeStops(Sequence& seq, const GeneticCode& gCode) throw (Exception)
{
const CodonAlphabet* calpha = dynamic_cast<const CodonAlphabet*>(seq.getAlphabet());
if (!calpha)
throw Exception("SequenceTools::removeStops. Input sequence should have a codon alphabet.");
for (size_t i = seq.size(); i > 0; --i)
{
- if (calpha->isStop(seq[i - 1]))
+ if (gCode.isStop(seq[i - 1]))
seq.deleteElement(i - 1);
}
}
/******************************************************************************/
-void SequenceTools::replaceStopsWithGaps(Sequence& seq) throw (Exception)
+void SequenceTools::replaceStopsWithGaps(Sequence& seq, const GeneticCode& gCode) throw (Exception)
{
const CodonAlphabet* calpha = dynamic_cast<const CodonAlphabet*>(seq.getAlphabet());
if (!calpha)
@@ -386,7 +402,7 @@ void SequenceTools::replaceStopsWithGaps(Sequence& seq) throw (Exception)
int gap = calpha->getGapCharacterCode();
for (size_t i = 0; i < seq.size(); ++i)
{
- if (calpha->isStop(seq[i]))
+ if (gCode.isStop(seq[i]))
seq.setElement(i, gap);
}
}
@@ -568,7 +584,7 @@ Sequence* SequenceTools::RNYslice(const Sequence& seq, int ph) throw (AlphabetEx
throw Exception("Bad phase for RNYSlice: " + TextTools::toString(ph) + ". Should be between 1 and 3.");
size_t s = seq.size();
- size_t n = (s - ph + 3) / 3;
+ size_t n = (s - static_cast<size_t>(ph) + 3) / 3;
vector<int> content(n);
@@ -577,7 +593,7 @@ Sequence* SequenceTools::RNYslice(const Sequence& seq, int ph) throw (AlphabetEx
for (size_t i = 0; i < n; i++)
{
- j = i * 3 + ph - 1;
+ j = i * 3 + static_cast<size_t>(ph) - 1;
if (j == 0)
content[i] = _RNY.getRNY(tir, seq[0], seq[1], *seq.getAlphabet());
@@ -636,27 +652,27 @@ Sequence* SequenceTools::RNYslice(const Sequence& seq) throw (AlphabetException)
/******************************************************************************/
-void SequenceTools::getCDS(Sequence& sequence, bool checkInit, bool checkStop, bool includeInit, bool includeStop)
+void SequenceTools::getCDS(Sequence& sequence, const GeneticCode& gCode, bool checkInit, bool checkStop, bool includeInit, bool includeStop)
{
const CodonAlphabet* alphabet = dynamic_cast<const CodonAlphabet*>(sequence.getAlphabet());
if (!alphabet)
throw AlphabetException("SequenceTools::getCDS. Sequence is not a codon sequence.");
if (checkInit)
{
- unsigned int i;
- for (i = 0; i < sequence.size() && !alphabet->isInit(sequence[i]); ++i)
+ size_t i;
+ for (i = 0; i < sequence.size() && !gCode.isStart(sequence[i]); ++i)
{}
- for (unsigned int j = 0; includeInit ? j < i : j <= i; ++j)
+ for (size_t j = 0; includeInit ? j < i : j <= i; ++j)
{
sequence.deleteElement(j);
}
}
if (checkStop)
{
- unsigned int i;
- for (i = 0; i < sequence.size() && !alphabet->isStop(sequence[i]); ++i)
+ size_t i;
+ for (i = 0; i < sequence.size() && !gCode.isStop(sequence[i]); ++i)
{}
- for (unsigned int j = includeStop ? i + 1 : i; j < sequence.size(); ++j)
+ for (size_t j = includeStop ? i + 1 : i; j < sequence.size(); ++j)
{
sequence.deleteElement(j);
}
@@ -696,4 +712,16 @@ size_t SequenceTools::findFirstOf(const Sequence& seq, const Sequence& motif, bo
}
/******************************************************************************/
+
+Sequence* SequenceTools::getRandomSequence(const Alphabet* alphabet, size_t length)
+{
+ int s = static_cast<int>(alphabet->getSize());
+ vector<int> content(length);
+ for (size_t i = 0; i < length; ++i) {
+ content[i] = RandomTools::giveIntRandomNumberBetweenZeroAndEntry(s);
+ }
+ return new BasicSequence("random", content, alphabet);
+}
+
+/******************************************************************************/
diff --git a/src/Bpp/Seq/SequenceTools.h b/src/Bpp/Seq/SequenceTools.h
index ee85eb4..f1a646f 100644
--- a/src/Bpp/Seq/SequenceTools.h
+++ b/src/Bpp/Seq/SequenceTools.h
@@ -46,6 +46,7 @@
#include "Alphabet/DNA.h"
#include "Alphabet/RNA.h"
#include "Alphabet/RNY.h"
+#include "GeneticCode/GeneticCode.h"
#include "Sequence.h"
#include "SymbolListTools.h"
#include "NucleicAcidsReplication.h"
@@ -241,14 +242,25 @@ public:
static size_t getNumberOfCompleteSites(const Sequence& seq);
/**
+ * @brief keep only complete sites in a sequence.
+ *
+ * The deleteElement method of the Sequence object will be used where appropriate.
+ * @param seq The sequence to analyse.
+ */
+
+ static Sequence* getSequenceWithCompleteSites(const Sequence& seq);
+
+ /**
* @return The number of unresolved sites in the sequence.
*
* @param seq The sequence to analyse.
*
* @author Sylvain Gaillard
*/
+
static size_t getNumberOfUnresolvedSites(const Sequence& seq);
+
/**
* @brief Remove gaps from a sequence.
*
@@ -273,9 +285,10 @@ public:
*
* The deleteElement method of the Sequence object will be used where appropriate.
* @param seq The sequence to analyse.
+ * @param gCode The genetic code according to which stop codons are specified.
* @throw Exception if the input sequence does not have a codon alphabet.
*/
- static void removeStops(Sequence& seq) throw (Exception);
+ static void removeStops(Sequence& seq, const GeneticCode& gCode) throw (Exception);
/**
* @brief Get a copy of the codon sequence without stops.
@@ -284,19 +297,21 @@ public:
* The original sequence will be cloned to serve as a template.
*
* @param seq The sequence to analyse.
+ * @param gCode The genetic code according to which stop codons are specified.
* @return A new sequence object without stops.
* @throw Exception if the input sequence does not have a codon alphabet.
*/
- static Sequence* getSequenceWithoutStops(const Sequence& seq) throw (Exception);
+ static Sequence* getSequenceWithoutStops(const Sequence& seq, const GeneticCode& gCode) throw (Exception);
/**
* @brief Replace stop codons by gaps.
*
* The setElement method of the Sequence object will be used where appropriate.
* @param seq The sequence to analyse.
+ * @param gCode The genetic code according to which stop codons are specified.
* @throw Exception if the input sequence does not have a codon alphabet.
*/
- static void replaceStopsWithGaps(Sequence& seq) throw (Exception);
+ static void replaceStopsWithGaps(Sequence& seq, const GeneticCode& gCode) throw (Exception);
/**
* @brief Bowker's test for homogeneity.
@@ -385,12 +400,13 @@ public:
* @brief Extract CDS part from a codon sequence. Optionally check for intiator and stop codons, or both.
*
* @param sequence The sequence to be reduced to CDS part.
+ * @param gCode The genetic code according to which start and stop codons are specified.
* @param checkInit If true, then everything before the initiator codon will be removed, together with the initiator codon if includeInit is false.
* @param checkStop If true, then everything after the first stop codon will be removed, together with the stop codon if includeStop is false.
* @param includeInit Tell if initiator codon should be kept or removed. No effect if checkInit is false.
* @param includeStop Tell if stop codon should be kept or removed. No effect if checkStop is false.
*/
- static void getCDS(Sequence& sequence, bool checkInit, bool checkStop, bool includeInit = true, bool includeStop = true);
+ static void getCDS(Sequence& sequence, const GeneticCode& gCode, bool checkInit, bool checkStop, bool includeInit = true, bool includeStop = true);
/**
* @brief Find the position of a motif in a sequence
@@ -403,6 +419,15 @@ public:
* length.
*/
static size_t findFirstOf(const Sequence& seq, const Sequence& motif, bool strict = true);
+
+ /**
+ * @brief Get a random sequence of given size and alphabet, with all state with equal probability.
+ *
+ * @param alphabet The alphabet to use.
+ * @param length The length of the sequence to generate.
+ * @return A pointer toward a new Sequence object.
+ */
+ static Sequence* getRandomSequence(const Alphabet* alphabet, size_t length);
};
} // end of namespace bpp.
diff --git a/src/Bpp/Seq/SequenceWithAnnotation.cpp b/src/Bpp/Seq/SequenceWithAnnotation.cpp
index d1caeca..1f86f4f 100644
--- a/src/Bpp/Seq/SequenceWithAnnotation.cpp
+++ b/src/Bpp/Seq/SequenceWithAnnotation.cpp
@@ -188,7 +188,7 @@ void SequenceWithAnnotation::setToSizeL(size_t newSize)
//We must truncate sequence from the left.
SymbolListDeletionEvent event(this, 0, seqSize - newSize);
fireBeforeSequenceDeleted(event);
- content_.erase(content_.begin(), content_.begin() + (seqSize - newSize));
+ content_.erase(content_.begin(), content_.begin() + static_cast<ptrdiff_t>(seqSize - newSize));
fireAfterSequenceDeleted(event);
return;
}
diff --git a/src/Bpp/Seq/SequenceWithAnnotationTools.cpp b/src/Bpp/Seq/SequenceWithAnnotationTools.cpp
index 8db345b..346afef 100644
--- a/src/Bpp/Seq/SequenceWithAnnotationTools.cpp
+++ b/src/Bpp/Seq/SequenceWithAnnotationTools.cpp
@@ -57,14 +57,16 @@ void SequenceMask::afterSequenceChanged(const SymbolListEditionEvent& event)
void SequenceMask::afterSequenceInserted(const SymbolListInsertionEvent& event)
{
- mask_.insert(mask_.begin() + event.getPosition(), event.getLength(), false);
+ mask_.insert(mask_.begin() + static_cast<ptrdiff_t>(event.getPosition()),
+ event.getLength(), false);
}
/******************************************************************************/
void SequenceMask::afterSequenceDeleted(const SymbolListDeletionEvent& event)
{
- mask_.erase(mask_.begin() + event.getPosition(), mask_.begin() + event.getPosition() + event.getLength());
+ mask_.erase(mask_.begin() + static_cast<ptrdiff_t>(event.getPosition()),
+ mask_.begin() + static_cast<ptrdiff_t>(event.getPosition() + event.getLength()));
}
/******************************************************************************/
diff --git a/src/Bpp/Seq/SequenceWithAnnotationTools.h b/src/Bpp/Seq/SequenceWithAnnotationTools.h
index c482f07..8b1e35d 100644
--- a/src/Bpp/Seq/SequenceWithAnnotationTools.h
+++ b/src/Bpp/Seq/SequenceWithAnnotationTools.h
@@ -44,6 +44,8 @@ knowledge of the CeCILL license and that you accept its terms.
#include "SequenceWithAnnotation.h"
#include <Bpp/Numeric/VectorTools.h>
+#include <cstddef>
+
namespace bpp {
class SequenceMask :
@@ -161,7 +163,7 @@ namespace bpp {
void setMask(size_t pos, const std::vector<bool>& mask) {
if (pos + mask.size() > mask_.size())
throw Exception("SequenceMask::setMask. Vector overflow. Scores number: " + TextTools::toString(mask_.size()) + ", but trying to insert " + TextTools::toString(mask.size()) + " scores at position " + TextTools::toString(pos) + ".");
- std::copy(mask.begin(), mask.end(), mask_.begin() + pos);
+ std::copy(mask.begin(), mask.end(), mask_.begin() + static_cast<ptrdiff_t>(pos));
}
bool merge(const SequenceAnnotation& anno) {
@@ -175,7 +177,7 @@ namespace bpp {
}
SequenceAnnotation* getPartAnnotation(size_t pos, size_t len) const throw (Exception) {
- return new SequenceMask(std::vector<bool>(mask_.begin() + pos, mask_.begin() + pos + len), removable_);
+ return new SequenceMask(std::vector<bool>(mask_.begin() + static_cast<ptrdiff_t>(pos), mask_.begin() + static_cast<ptrdiff_t>(pos + len)), removable_);
}
};
diff --git a/src/Bpp/Seq/SequenceWithQuality.cpp b/src/Bpp/Seq/SequenceWithQuality.cpp
index 8613310..40cc8f4 100644
--- a/src/Bpp/Seq/SequenceWithQuality.cpp
+++ b/src/Bpp/Seq/SequenceWithQuality.cpp
@@ -62,14 +62,18 @@ void SequenceQuality::afterSequenceChanged(const SymbolListEditionEvent& event)
void SequenceQuality::afterSequenceInserted(const SymbolListInsertionEvent& event)
{
- qualScores_.insert(qualScores_.begin() + event.getPosition(), event.getLength(), DEFAULT_QUALITY_VALUE);
+ qualScores_.insert(
+ qualScores_.begin() + static_cast<ptrdiff_t>(event.getPosition()),
+ event.getLength(), DEFAULT_QUALITY_VALUE);
}
/******************************************************************************/
void SequenceQuality::afterSequenceDeleted(const SymbolListDeletionEvent& event)
{
- qualScores_.erase(qualScores_.begin() + event.getPosition(), qualScores_.begin() + event.getPosition() + event.getLength());
+ qualScores_.erase(
+ qualScores_.begin() + static_cast<ptrdiff_t>(event.getPosition()),
+ qualScores_.begin() + static_cast<ptrdiff_t>(event.getPosition() + event.getLength()));
}
/******************************************************************************/
diff --git a/src/Bpp/Seq/SequenceWithQuality.h b/src/Bpp/Seq/SequenceWithQuality.h
index f0aebe4..7b26aab 100644
--- a/src/Bpp/Seq/SequenceWithQuality.h
+++ b/src/Bpp/Seq/SequenceWithQuality.h
@@ -50,6 +50,7 @@ knowledge of the CeCILL license and that you accept its terms.
#include <string>
#include <vector>
+#include <cstddef>
namespace bpp {
/**
@@ -182,7 +183,7 @@ namespace bpp {
void setScores(size_t pos, const std::vector<int>& scores) {
if (pos + scores.size() > qualScores_.size())
throw Exception("SequenceQuality::setScores. Vector overflow. Scores number: " + TextTools::toString(qualScores_.size()) + ", but trying to insert " + TextTools::toString(scores.size()) + " scores at position " + TextTools::toString(pos) + ".");
- std::copy(scores.begin(), scores.end(), qualScores_.begin() + pos);
+ std::copy(scores.begin(), scores.end(), qualScores_.begin() + static_cast<ptrdiff_t>(pos));
}
bool merge(const SequenceAnnotation& anno) {
@@ -196,7 +197,11 @@ namespace bpp {
}
SequenceQuality* getPartAnnotation(size_t pos, size_t len) const throw (Exception) {
- return new SequenceQuality(std::vector<int>(qualScores_.begin() + pos, qualScores_.begin() + pos + len), removable_);
+ return new SequenceQuality(
+ std::vector<int>(
+ qualScores_.begin() + static_cast<ptrdiff_t>(pos),
+ qualScores_.begin() + static_cast<ptrdiff_t>(pos + len)),
+ removable_);
}
};
diff --git a/src/Bpp/Seq/SiteTools.cpp b/src/Bpp/Seq/SiteTools.cpp
index f731b41..1431648 100644
--- a/src/Bpp/Seq/SiteTools.cpp
+++ b/src/Bpp/Seq/SiteTools.cpp
@@ -105,22 +105,6 @@ bool SiteTools::hasUnknown(const Site& site)
/******************************************************************************/
-bool SiteTools::hasStopCodon(const Site& site)
-{
- // Main loop : for all characters in site
- const CodonAlphabet* pca = dynamic_cast<const CodonAlphabet*>(site.getAlphabet());
- if (pca == 0)
- return false;
- for (size_t i = 0; i < site.size(); i++)
- {
- if (pca->isStop(site[i]))
- return true;
- }
- return false;
-}
-
-/******************************************************************************/
-
bool SiteTools::isComplete(const Site& site)
{
// Main loop : for all characters in site
@@ -356,7 +340,7 @@ size_t SiteTools::getNumberOfDistinctCharacters(const Site& site) throw (EmptySi
return 1;
map<int, size_t> counts;
SymbolListTools::getCounts(site, counts);
- int s = 0;
+ size_t s = 0;
for (map<int, size_t>::iterator it = counts.begin(); it != counts.end(); it++)
{
if (it->second != 0)
diff --git a/src/Bpp/Seq/SiteTools.h b/src/Bpp/Seq/SiteTools.h
index b8fac4f..f0f3ff1 100644
--- a/src/Bpp/Seq/SiteTools.h
+++ b/src/Bpp/Seq/SiteTools.h
@@ -88,11 +88,6 @@ public:
/**
* @param site A site.
- * @return True if the site contains a Stop Codon, when the alphabet is a CodonAlphabet.
- */
- static bool hasStopCodon(const Site& site);
- /**
- * @param site A site.
* @return True if the site contains no gap and no unknown characters.
*/
static bool isComplete(const Site& site);
diff --git a/src/Bpp/Seq/StringSequenceTools.cpp b/src/Bpp/Seq/StringSequenceTools.cpp
index 36cbf47..fb29e48 100644
--- a/src/Bpp/Seq/StringSequenceTools.cpp
+++ b/src/Bpp/Seq/StringSequenceTools.cpp
@@ -58,7 +58,7 @@ using namespace std;
/****************************************************************************************/
-string StringSequenceTools::subseq(const string& sequence, int begin, int end) throw (Exception)
+string StringSequenceTools::subseq(const string& sequence, size_t begin, size_t end) throw (Exception)
{
// Checking interval
if (end < begin)
@@ -68,8 +68,8 @@ string StringSequenceTools::subseq(const string& sequence, int begin, int end) t
string temp(sequence);
// Truncate sequence
- temp.erase(temp.begin() + end + 1, temp.end());
- temp.erase(temp.begin(), temp.begin() + begin);
+ temp.erase(temp.begin() + static_cast<ptrdiff_t>(end + 1), temp.end());
+ temp.erase(temp.begin(), temp.begin() + static_cast<ptrdiff_t>(begin));
// Send result
return temp;
@@ -77,12 +77,12 @@ string StringSequenceTools::subseq(const string& sequence, int begin, int end) t
/****************************************************************************************/
-string StringSequenceTools::setToSizeR(const string& sequence, int size)
+string StringSequenceTools::setToSizeR(const string& sequence, size_t size)
{
return TextTools::resizeRight(sequence, size, '-');
}
-string StringSequenceTools::setToSizeL(const string& sequence, int size)
+string StringSequenceTools::setToSizeL(const string& sequence, size_t size)
{
return TextTools::resizeLeft(sequence, size, '-');
}
@@ -273,9 +273,9 @@ vector<int> StringSequenceTools::codeSequence(const string& sequence, const Alph
throw (BadCharException)
{
unsigned int size = AlphabetTools::getAlphabetCodingSize(alphabet); // Warning, an exception may be casted here!
- vector<int> code((int)floor((double)sequence.size() / (double)size));
- unsigned int pos = 0;
- unsigned int count = 0;
+ vector<int> code(static_cast<size_t>(floor(static_cast<double>(sequence.size()) / static_cast<double>(size))));
+ size_t pos = 0;
+ size_t count = 0;
while (pos + size <= sequence.size())
{
code[count] = alphabet->charToInt(sequence.substr(pos, size));
diff --git a/src/Bpp/Seq/StringSequenceTools.h b/src/Bpp/Seq/StringSequenceTools.h
index 45ee54c..224a5fb 100644
--- a/src/Bpp/Seq/StringSequenceTools.h
+++ b/src/Bpp/Seq/StringSequenceTools.h
@@ -80,7 +80,7 @@ class StringSequenceTools
* @return A string with the subsequence.
* @throw Exception If position does not not match the interval [0, length].
*/
- static std::string subseq(const std::string& sequence, int begin, int end) throw (Exception);
+ static std::string subseq(const std::string& sequence, size_t begin, size_t end) throw (Exception);
/**
* @brief Set up the size of a sequence from the right side.
@@ -91,7 +91,7 @@ class StringSequenceTools
* @param sequence The input sequence.
* @param size The new size of the sequence.
*/
- static std::string setToSizeR(const std::string& sequence, int size);
+ static std::string setToSizeR(const std::string& sequence, size_t size);
/**
* @brief Set up the size of a sequence from the left side.
@@ -102,7 +102,7 @@ class StringSequenceTools
* @param sequence The input sequence.
* @param size The new size of the sequence.
*/
- static std::string setToSizeL(const std::string& sequence, int size);
+ static std::string setToSizeL(const std::string& sequence, size_t size);
/**
* @brief Delete all occurence of a character in the sequence.
diff --git a/src/Bpp/Seq/SymbolList.cpp b/src/Bpp/Seq/SymbolList.cpp
index fcf595f..b7673f2 100644
--- a/src/Bpp/Seq/SymbolList.cpp
+++ b/src/Bpp/Seq/SymbolList.cpp
@@ -128,7 +128,7 @@ void BasicSymbolList::addElement(const string& c) throw (BadCharException)
void BasicSymbolList::addElement(size_t pos, const string& c) throw (BadCharException, IndexOutOfBoundsException)
{
if(pos >= content_.size()) throw IndexOutOfBoundsException("BasicSymbolList::addElement. Invalid position.", pos, 0, size() - 1);
- content_.insert(content_.begin() + pos, alphabet_->charToInt(c));
+ content_.insert(content_.begin() + static_cast<ptrdiff_t>(pos), alphabet_->charToInt(c));
}
/****************************************************************************************/
@@ -164,7 +164,7 @@ void BasicSymbolList::deleteElement(size_t pos) throw (IndexOutOfBoundsException
{
if(pos >= content_.size())
throw IndexOutOfBoundsException("BasicSymbolList::deleteElement. Invalid position.", pos, 0, size() - 1);
- content_.erase(content_.begin() + pos);
+ content_.erase(content_.begin() + static_cast<ptrdiff_t>(pos));
}
/****************************************************************************************/
@@ -173,7 +173,7 @@ void BasicSymbolList::deleteElements(size_t pos, size_t len) throw (IndexOutOfBo
{
if (pos + len > content_.size())
throw IndexOutOfBoundsException("BasicSymbolList::deleteElements. Invalid position.", pos + len, 0, size() - 1);
- content_.erase(content_.begin() + pos, content_.begin() + pos + len);
+ content_.erase(content_.begin() + static_cast<ptrdiff_t>(pos), content_.begin() + static_cast<ptrdiff_t>(pos + len));
}
/****************************************************************************************/
@@ -193,7 +193,7 @@ void BasicSymbolList::addElement(size_t pos, int v) throw (BadIntException, Inde
if(pos >= content_.size())
throw IndexOutOfBoundsException("BasicSymbolList::addElement. Invalid position.", pos, 0, size() - 1);
alphabet_->intToChar(v);
- content_.insert(content_.begin() + pos, v);
+ content_.insert(content_.begin() + static_cast<ptrdiff_t>(pos), v);
}
/****************************************************************************************/
@@ -333,7 +333,7 @@ void EdSymbolList::addElement(size_t pos, const string& c) throw (BadCharExcepti
if (pos >= content_.size()) throw IndexOutOfBoundsException("EdSymbolList::addElement. Invalid position.", pos, 0, size() - 1);
SymbolListInsertionEvent event(this, pos, 1);
fireBeforeSequenceInserted(event);
- content_.insert(content_.begin() + pos, alphabet_->charToInt(c));
+ content_.insert(content_.begin() + static_cast<ptrdiff_t>(pos), alphabet_->charToInt(c));
fireAfterSequenceInserted(event);
}
@@ -372,7 +372,7 @@ void EdSymbolList::deleteElement(size_t pos) throw (IndexOutOfBoundsException)
throw IndexOutOfBoundsException("EdSymbolList::deleteElement. Invalid position.", pos, 0, size() - 1);
SymbolListDeletionEvent event(this, pos, 1);
fireBeforeSequenceDeleted(event);
- content_.erase(content_.begin() + pos);
+ content_.erase(content_.begin() + static_cast<ptrdiff_t>(pos));
fireAfterSequenceDeleted(event);
}
@@ -384,7 +384,7 @@ void EdSymbolList::deleteElements(size_t pos, size_t len) throw (IndexOutOfBound
throw IndexOutOfBoundsException("EdSymbolList::deleteElements. Invalid position.", pos + len, 0, size() - 1);
SymbolListDeletionEvent event(this, pos, len);
fireBeforeSequenceDeleted(event);
- content_.erase(content_.begin() + pos, content_.begin() + pos + len);
+ content_.erase(content_.begin() + static_cast<ptrdiff_t>(pos), content_.begin() + static_cast<ptrdiff_t>(pos + len));
fireAfterSequenceDeleted(event);
}
@@ -411,7 +411,7 @@ void EdSymbolList::addElement(size_t pos, int v) throw (BadIntException, IndexOu
SymbolListInsertionEvent event(this, pos, 1);
fireBeforeSequenceInserted(event);
alphabet_->intToChar(v);
- content_.insert(content_.begin() + pos, v);
+ content_.insert(content_.begin() + static_cast<ptrdiff_t>(pos), v);
fireAfterSequenceInserted(event);
}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2478551..be64828 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -25,6 +25,7 @@ SET(CPP_FILES
Bpp/Seq/DistanceMatrix.cpp
Bpp/Seq/CodonSiteTools.cpp
Bpp/Seq/Alphabet/AbstractAlphabet.cpp
+ Bpp/Seq/Alphabet/LetterAlphabet.cpp
Bpp/Seq/Alphabet/AlphabetExceptions.cpp
Bpp/Seq/Alphabet/AlphabetTools.cpp
Bpp/Seq/Alphabet/CodonAlphabet.cpp
@@ -33,20 +34,19 @@ SET(CPP_FILES
Bpp/Seq/Alphabet/DNA.cpp
Bpp/Seq/Alphabet/ProteicAlphabet.cpp
Bpp/Seq/Alphabet/RNA.cpp
- Bpp/Seq/Alphabet/StandardCodonAlphabet.cpp
- Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.cpp
- Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.cpp
- Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.cpp
- Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.cpp
Bpp/Seq/Alphabet/WordAlphabet.cpp
Bpp/Seq/Alphabet/RNY.cpp
Bpp/Seq/Alphabet/BinaryAlphabet.cpp
+ Bpp/Seq/Alphabet/IntegerAlphabet.cpp
+ Bpp/Seq/Alphabet/NumericAlphabet.cpp
Bpp/Seq/GeneticCode/GeneticCode.cpp
Bpp/Seq/GeneticCode/StandardGeneticCode.cpp
Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.cpp
Bpp/Seq/GeneticCode/InvertebrateMitochondrialGeneticCode.cpp
Bpp/Seq/GeneticCode/EchinodermMitochondrialGeneticCode.cpp
Bpp/Seq/GeneticCode/YeastMitochondrialGeneticCode.cpp
+ Bpp/Seq/GeneticCode/AscidianMitochondrialGeneticCode.cpp
+ Bpp/Seq/GeneticCode/MoldMitochondrialGeneticCode.cpp
Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.cpp
Bpp/Seq/AlphabetIndex/MiyataAAChemicalDistance.cpp
Bpp/Seq/AlphabetIndex/BLOSUM50.cpp
@@ -130,14 +130,12 @@ SET(H_FILES
Bpp/Seq/Alphabet/ProteicAlphabet.h
Bpp/Seq/Alphabet/ProteicAlphabetState.h
Bpp/Seq/Alphabet/RNA.h
- Bpp/Seq/Alphabet/StandardCodonAlphabet.h
- Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.h
- Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.h
- Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.h
- Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.h
Bpp/Seq/Alphabet/WordAlphabet.h
Bpp/Seq/Alphabet/RNY.h
Bpp/Seq/Alphabet/BinaryAlphabet.h
+ Bpp/Seq/Alphabet/IntegerAlphabet.h
+ Bpp/Seq/Alphabet/AlphabetNumericState.h
+ Bpp/Seq/Alphabet/NumericAlphabet.h
Bpp/Seq/GeneticCode/GeneticCode.h
Bpp/Seq/GeneticCode/StandardGeneticCode.h
Bpp/Seq/GeneticCode/VertebrateMitochondrialGeneticCode.h
@@ -192,6 +190,7 @@ SET(H_FILES
Bpp/Seq/Container/SiteContainerIterator.h
Bpp/Seq/AlphabetIndex/AlphabetIndex1.h
Bpp/Seq/AlphabetIndex/AlphabetIndex2.h
+ Bpp/Seq/AlphabetIndex/UserAlphabetIndex1.h
Bpp/Seq/AlphabetIndex/GranthamAAChemicalDistance.h
Bpp/Seq/AlphabetIndex/GranthamAAPolarityIndex.h
Bpp/Seq/AlphabetIndex/GranthamAAVolumeIndex.h
diff --git a/test/test_alphabets.cpp b/test/test_alphabets.cpp
index 9fc128e..bf5b346 100644
--- a/test/test_alphabets.cpp
+++ b/test/test_alphabets.cpp
@@ -41,11 +41,7 @@ knowledge of the CeCILL license and that you accept its terms.
#include <Bpp/Seq/Alphabet/RNA.h>
#include <Bpp/Seq/Alphabet/ProteicAlphabet.h>
#include <Bpp/Seq/Alphabet/DefaultAlphabet.h>
-#include <Bpp/Seq/Alphabet/StandardCodonAlphabet.h>
-#include <Bpp/Seq/Alphabet/VertebrateMitochondrialCodonAlphabet.h>
-#include <Bpp/Seq/Alphabet/InvertebrateMitochondrialCodonAlphabet.h>
-#include <Bpp/Seq/Alphabet/EchinodermMitochondrialCodonAlphabet.h>
-#include <Bpp/Seq/Alphabet/YeastMitochondrialCodonAlphabet.h>
+#include <Bpp/Seq/Alphabet/CodonAlphabet.h>
#include <iostream>
using namespace bpp;
@@ -57,21 +53,13 @@ int main() {
NucleicAlphabet* rna = new RNA();
Alphabet* pro = new ProteicAlphabet;
Alphabet* def = new DefaultAlphabet;
- Alphabet* stdCdn = new StandardCodonAlphabet(rna);
- Alphabet* vmtCdn = new VertebrateMitochondrialCodonAlphabet(rna);
- Alphabet* imtCdn = new InvertebrateMitochondrialCodonAlphabet(rna);
- Alphabet* emtCdn = new EchinodermMitochondrialCodonAlphabet(rna);
- Alphabet* ymtCdn = new YeastMitochondrialCodonAlphabet(rna);
+ Alphabet* cdn = new CodonAlphabet(rna);
delete dna;
delete rna;
delete pro;
delete def;
- delete stdCdn;
- delete vmtCdn;
- delete imtCdn;
- delete emtCdn;
- delete ymtCdn;
+ delete cdn;
return (0);
}
diff --git a/test/test_bowker.cpp b/test/test_bowker.cpp
index 8b4e24b..f71ef32 100644
--- a/test/test_bowker.cpp
+++ b/test/test_bowker.cpp
@@ -48,7 +48,7 @@ using namespace std;
BasicSequence* getRandomSequence(const Alphabet* alphabet, unsigned int size) {
string seq = "";
for (unsigned int i = 0; i < size; ++i)
- seq += alphabet->intToChar(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(alphabet->getSize()));
+ seq += alphabet->intToChar(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(alphabet->getSize())));
return new BasicSequence("random seq", seq, alphabet);
}
diff --git a/test/test_walker.cpp b/test/test_walker.cpp
index 1757f2a..06fe0d4 100644
--- a/test/test_walker.cpp
+++ b/test/test_walker.cpp
@@ -54,7 +54,7 @@ bool testSeq(SequenceWalker& walker, unsigned int pos, unsigned int truth) {
cout << walker.getSequencePosition(46) << endl;;
for (unsigned int i = 0; i < 1000; ++i) {
ApplicationTools::displayGauge(i, 999, '=');
- size_t r = RandomTools::giveIntRandomNumberBetweenZeroAndEntry(47);
+ size_t r = RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(47);
size_t x = walker.getSequencePosition(r);
if (walker.getSequencePosition(pos) != truth) {
cout << endl;
@@ -74,7 +74,7 @@ bool testAln(SequenceWalker& walker, unsigned int pos, unsigned int truth) {
cout << walker.getAlignmentPosition(26) << endl;
for (unsigned int i = 0; i < 1000; ++i) {
ApplicationTools::displayGauge(i, 999, '=');
- unsigned int r = RandomTools::giveIntRandomNumberBetweenZeroAndEntry(27);
+ size_t r = RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(27);
walker.getAlignmentPosition(r);
if (walker.getAlignmentPosition(pos) != truth) {
cout << endl;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libbpp-seq.git
More information about the debian-med-commit
mailing list