[med-svn] [bppsuite] 03/10: Import Upstream version 0.7.0
Andreas Tille
tille at debian.org
Wed Jun 14 11:36:59 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository bppsuite.
commit 5b3e4806f1360c7d99d783774d2f38b232f9a969
Author: Andreas Tille <tille at debian.org>
Date: Wed Jun 14 13:23:22 2017 +0200
Import Upstream version 0.7.0
---
CMakeLists.txt | 11 +-
ChangeLog | 9 +
Examples/AlignmentScoring/AlnScores.bpp | 13 +
Examples/Data/HIV1_REF_2010_gag_DNA.fasta | 1638 ++++++++++++++++++++
Examples/Data/HIV1_REF_2010_gag_macse_AA.fasta | 78 +
Examples/Data/HIV1_REF_2010_gag_macse_DNA.fasta | 78 +
.../MaximumLikelihood/Codons/BranchModel/ML.bpp | 2 +-
.../Codons/{BranchModel => M0}/ML.bpp | 14 +-
.../Codons/{BranchModel => M1}/ML.bpp | 20 +-
.../Codons/{BranchModel => M2}/ML.bpp | 20 +-
.../Nucleotides/Homogeneous/ML.bpp | 6 +-
.../Nucleotides/NonHomogeneousGG/MLNHGG.bpp | 8 +-
.../Nucleotides/NonHomogeneousGeneral/MLNH.bpp | 31 +-
.../MaximumLikelihood/Proteins/Homogeneous/ML.bpp | 6 +-
Examples/README | 5 +
Examples/SequenceManipulation/SeqMan2.bpp | 33 +
Examples/SequenceSimulation/SeqGen.bpp | 2 +-
bppSuite.spec | 7 +-
bppSuite/CMakeLists.txt | 5 +
bppSuite/bppAlnScore.cpp | 226 +++
bppSuite/bppAncestor.cpp | 268 ++--
bppSuite/bppML.cpp | 280 ++--
bppSuite/bppSeqMan.cpp | 100 +-
debian/changelog | 7 +
debian/control | 6 +-
doc/bppsuite.texi | 700 ++++++---
doc/version.texi | 2 -
man/CMakeLists.txt | 1 +
man/bppalnscore.1.txt | 39 +
29 files changed, 3048 insertions(+), 567 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a638765..b96e901 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -164,9 +164,11 @@ IF(NROFF_EXE)
COMMAND gzip -f bppphysamp.1
COMMAND cp bpptreedraw.1.txt bpptreedraw.1
COMMAND gzip -f bpptreedraw.1
+ COMMAND cp bppalnscore.1.txt bppalnscore.1
+ COMMAND gzip -f bppalnscore.1
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/man
)
- SET_DIRECTORY_PROPERTIES(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "man/bppml.1.gz;man/bppseqgen.1.gz;man/bppdist.1.gz;man/bpppars.1.gz;man/bppseqman.1.gz;man/bppconsense.1.gz;man/bppancestor.1.gz;man/bppreroot.1.gz;man/bppphysamp.1.gz;man/bpptreedraw.1.gz")
+ SET_DIRECTORY_PROPERTIES(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "man/bppml.1.gz;man/bppseqgen.1.gz;man/bppdist.1.gz;man/bpppars.1.gz;man/bppseqman.1.gz;man/bppconsense.1.gz;man/bppancestor.1.gz;man/bppreroot.1.gz;man/bppphysamp.1.gz;man/bpptreedraw.1.gz;man/bppalnscore.1.gz")
ENDIF(NROFF_EXE)
# Subdirectories
@@ -179,10 +181,10 @@ ENDIF(NO_DEP_CHECK)
# Packager
SET(CPACK_PACKAGE_NAME "bppsuite")
SET(CPACK_PACKAGE_VENDOR "Bio++ Development Team")
-SET(CPACK_PACKAGE_VERSION "0.6.2")
+SET(CPACK_PACKAGE_VERSION "0.7.0")
SET(CPACK_PACKAGE_VERSION_MAJOR "0")
-SET(CPACK_PACKAGE_VERSION_MINOR "6")
-SET(CPACK_PACKAGE_VERSION_PATCH "2")
+SET(CPACK_PACKAGE_VERSION_MINOR "7")
+SET(CPACK_PACKAGE_VERSION_PATCH "0")
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "The Bio++ Program Suite")
SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING.txt")
SET(CPACK_RESOURCE_FILE_AUTHORS "${CMAKE_SOURCE_DIR}/AUTHORS.txt")
@@ -215,6 +217,7 @@ SET(CPACK_SOURCE_IGNORE_FILES
"bppSuite/bpptreedraw"
"bppSuite/bppdist"
"bppSuite/bppseqgen"
+ "bppSuite/bppalnscore"
"doc/bppsuite/"
"doc/bppsuite\\\\.info"
"doc/bppsuite\\\\.toc"
diff --git a/ChangeLog b/ChangeLog
index 3aa35c2..eeb7b49 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+08/02/12 -*- Version 0.7.0 -*-
+
+21/12/11 Julien Dutheil
+* New bppAlnScore program
+* Improved bppSeqMan: protein translation now works + more stop codons removal options.
+
+17/06/11 Julien Dutheil
+* Improved bppancestor, also allows to compute frequencies for leaves.
+
11/06/11 -*- Version 0.6.2 -*-
* Small bug fixed + new packages.
diff --git a/Examples/AlignmentScoring/AlnScores.bpp b/Examples/AlignmentScoring/AlnScores.bpp
new file mode 100644
index 0000000..1d83add
--- /dev/null
+++ b/Examples/AlignmentScoring/AlnScores.bpp
@@ -0,0 +1,13 @@
+DATA=HIV1_REF_2010_gag
+
+input.sequence.file.test = ../Data/$(DATA)_DNA.fasta
+input.sequence.format.test = Fasta
+input.sequence.file.ref = ../Data/$(DATA)_macse_DNA.fasta
+input.sequence.format.ref = Fasta
+
+score.word_size = 3
+score.phase = ATG
+
+output.scores = $(DATA).scores.txt
+output.mase = $(DATA).filter.mase
+output.sps_thresholds = 0.8
diff --git a/Examples/Data/HIV1_REF_2010_gag_DNA.fasta b/Examples/Data/HIV1_REF_2010_gag_DNA.fasta
new file mode 100644
index 0000000..f8ea4e9
--- /dev/null
+++ b/Examples/Data/HIV1_REF_2010_gag_DNA.fasta
@@ -0,0 +1,1638 @@
+>Ref.A1.AU.03.PS1044_Day0.DQ676872
+ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAGATTAGATGCATGGGA
+GAAAATTCGGCTAAGGCCAGGGGGAAAGAAAAAATATAGACTAAAACATC
+TAGTATGGGCAAGCAGGGAGCTGGAGAGATTCGCACTTAAYCCTRGCCTT
+TTAGAATCAGCAGAAGGATGTCAACAAATAATGGAACAGTTACAACCAGC
+TCTYAAGACAGGAWCAGAAGAAATTAAATCATTATTTAATACAGTAGCAA
+CCCTCTATTGTGTACATCAAAGGATAGATGTAAAAGACACCAAGGAAGCT
+YTAGATAAAATAGAGGAAATAAAA------------AAT-----------
+----------AAG---------------AGCAAG------CAAAGG----
+-----------------------------ACTCAACAGGCAGCAGCT---
+------------------------------GACACAGGA-----------
+--------------------------------------------------
+-----------------------------AACAGCGGCAAG---------
+---------GTCAGCCAAAATTACCCTATAGTGCAAAATGCACAGGGGCA
+AATGATACAYCAAAACTTGTCACCTAGAACTTTAAATGCATGGGTAAAAG
+TAATAGAA---GAAAAGGCTTTTAGTCCAGAAGTGATACCCATGTTCTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAATGTAATGCTGAACAT
+AGTGGGGGGACACCAGGCAGCTATGCAAATGTTAAAAGACACCATCAATG
+AAGAAGCTGCAGAATGGGACAGGTTACATCCAGTACATGCAGGGCCTATT
+CCACCAGGCCAGATAAGAGAACCAAGGGGAAGTGACATAGCAGGAGCTAC
+TAGTACCCCTCAAGAACAATTACAATGGATGACA---------GGCAACC
+CACCTATC---CCAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTACTAGCATTTTGGATATAAG
+ACAAGGGCCAAAAGAATCCTTCAGAGACTATGTAGATAGGTTCTTTAAAG
+CTCTTAGAGCTGAGCAAGCTACACAGGAGGTAAAAAGTTGGATGACAGAG
+ACATTACTGGTC---CAAAATGCAAATCCAGATTGTAAGTCCATTCTAAA
+AGCATTAGGATCAGGAGCTACATTAGAAGAAATGATGACAGCATGCCAGG
+GAGTGGGAGGACCCAGCCATAAGGCAAGGGTTTTGGCTGAAGCAATG---
+---------AGTCAAGCACAA------------------------CAAAC
+AAGCATAATGATGCAGAGA---GGCAAC---TTTAGGGGCGGCCAGAAA-
+--AGG---ATTAAGTGTTTTAACTGTGGCAAAGAAGGACACCTAGCCAGA
+AATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGGAAGGAGGG
+ACACCAAATGAAAGACTGCACT---------GAA---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAGCAAG---------GGG---
+AGGCCAGGAAATTTCCCTCAGAGC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCG---GAGCTCTTT
+GGGATGGGG---------------GAAGAGATA---ACCTCCCCT-----
+-------------------CCGAAGCAGGAGCAG----------------
+--------AAGGACAAG---------------------GAACAGGTC---
+---CCACCC---TTA---GTTTCCCTCAAATCACTCTTTGGCAACGACCC
+ATCGTCACAGTAA
+>Ref.A1.RW.92.92RW008.AB253421
+ATGGGTGCGAGAGCGTCAGTATTAAGTGGGGGAAAATTAGATGCATGGGA
+AAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGAATGAAACATC
+TAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTAACCCTGGCCTT
+TTAGAAACAACAGAAGGATGTCAAAAAATAATAGAACAGTTACAACCATC
+TGTCAAGACAGGAACAGAAGAACTTAAATCATTATTTAATACAGTAGCAA
+CCCTCTATTGCGTACATCAACGGATAGATGTAAAAGACACCAAGGAAGCC
+CTAGATAAAATAGAGGAAATGCAA------------AAT-----------
+----------AAG---------------AGCAAG------CAAAAG----
+-----------------------------ACACAACAGGCAGCAGCT---
+------------------------------GACATAGGA-----------
+--------------------------------------------------
+-----------------------------AATAGCAGCAAG---------
+---------GTCAGCCAAAATTACCCTATAGTGCAAAATGCACAAGGGCA
+AATGATATATCAGTCCATGTCACCTAGGACTTTGAATGCATGGGTGAAAG
+TAATAGAA---GAAAAGGGTTTCAACCCAGAAGTAATACCCATGTTCTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAATATGATGCTAAACAT
+AGTGGGGGGACATCAGGCAGCTATGCAAATGTTAAAAGATACCATCAATG
+AGGAAGCTGCAGACTGGGACAGGTTACATCCAGTACAGGCAGGGCCTATT
+CCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTAC
+TAGTACCCCTCAAGAACAAATAGGATGGATGACA---------AGCAACC
+CACCTATC---CCAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTTAGCATTTTGGATGTAAA
+ACAAGGGCCAAAAGAACCCTTTAGAGATTATGTAGATAGGTTCTTTAAAA
+TTCTCAGAGCTGAACAAGCTACACAGGATGTAAAACATTGGATGACAGAA
+ACATTGCTGATC---CAAAATGCAAATCCAGATTGTAAGTCCATTTTAAG
+AGCATTAGGAACAGGGGCTACATTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCCAGCCATAAAGCAAGGGTTTTAGCTGAGGCAATG---
+---------AGTCAAGTACAA------------------------CATCC
+AAACATAATGATGCAGAGA---GGCAAT---TTTAGGGGC---CAGAAA-
+--AGG---ATTAAGTGCTTCAACTGTGGCAAAGAAGGACACCTAGCCAGA
+AATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAGGG
+ACACCAAATGAAAGACTGCACT---------GAA---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAGCAAG---------GGG---
+AGGCCAGGAAATTTTCCTCAGAGC--------------------------
+----AGACCG------GAGCCATCAGCC----------------------
+-----------------------------CCACCAGCA---GAGATCTTT
+GGGATAGGG---------------GAAGAGATA---GCCTCCCCT-----
+-------------------CCGAAGCAGGAGCAG----------------
+--------AAAGACAGG---------------------GAACCGGCC---
+---CAACCT---TTA---GTTTCCCTCAAATCACTCTTTGGCAACGACCC
+CTTGTCACAGTAA
+>Ref.A1.UG.92.92UG037.AB253429
+ATGGGTGCGAGAGCGTCAGTATTAAGTGGGGGAAAATTAGATGCATGGGA
+GAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATC
+TAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTAACCCTAGCCTT
+TTAGAAACAACAGAAGGATGTCAACAAATAATGGAACAATTACAATCAGC
+TCTCAGAACAGGAACAGAAGAACTTAGATCATTATATAATACAGTAGCAA
+CCCTCTATTGCGTACATCAACGGATAGAGGTAAAAGACACCAAGGAAGCT
+CTAGATAAAATAGAGGAGATACAA------------AAG-----------
+----------AAA---------------AGCAAG------CAAAAG----
+-----------------------------ACACAGCAGGCAGCAGCT---
+------------------------------GACACAGGA-----------
+--------------------------------------------------
+-----------------------------AGTAGCAGCAAG---------
+---------GTCAGCCAAAATTACCCTATAGTGCAAAATGCACAAGGGCA
+AATGATCCACCAGTCCTTGTCACCTAGGACTTTGAATGCATGGGTGAAAG
+TAATAGAA---GAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTCTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTGAATATGATGCTGAACAT
+AGTGGGGGGACACCAGGCAGCTATGCAAATGTTAAAAGATACCATCAATG
+AGGAAGCTGCAGAATGGGACAGGCTACATCCAGTACATGCAGGGCCTGTT
+GCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGATATAGCAGGAACTAC
+TAGTACCCCTCAAGAACAAATAGCATGGATGACA---------GGCAACC
+CACCTATC---CCAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTTAGCATTTTAGATATAAA
+ACAAGGGCCAAAAGAACCCTTCAGAGACTATGTAGATAGGTTTTTTAAAA
+CTCTCAGAGCTGAGCAAGCTACACAGGAGGTAAAAGGTTGGATGACAGAA
+ACATTACTGATC---CAAAATGCAAATCCAGATTGTAAATCCATCCTAAG
+AGCATTAGGAGCAGGGGCTACATTAGAAGAAATGATGACAGCATGCCAGG
+GAGTGGGAGGACCCGGCCATAAAGCAAGAGTTTTGGCTGAGGCAATG---
+---------AGTCAAGTACAA------------------------CATAC
+AAACATAATGATGCAGAGA---GGCAAT---TTTAAGGGC---CAGAAA-
+--AGG---ATTAAGTGTTTCAACTGTGGCAAAGAAGGACATCTAGCCAAA
+AATTGCAGGGCTCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGG
+GCACCAAATGAAGGACTGCACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAAAATCTGG---CCT---TCCAGCAAA---------GGG---
+AGGCCAGGAAATTTTCCTCAGAGC--------------------------
+----AGACCA------GAACCAACAGCC----------------------
+-----------------------------CCACCAGCAGCAGAGATCTTT
+GGGATGAGG---------------GAAGAGATA---GTCTCCCCT-----
+-------------------CCGAAGCAGGAGCAG----------------
+--------AACGACAGG---------------------GACCAGAAC---
+---CCACCT---TCA---GTTTCCCTCAAATCACTCTTTGGCAACGACCT
+CTTGTCACAGTAA
+>Ref.A2.CD.97.97CDKTB48.AF286238
+ATGGGTGCGAGAGCGTCAGTATTGAGCGGCGGAAAATTAGAAGCTTGGGA
+GAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGACTGAAACATT
+TAGTATGGGCAAGCAGGGAGCTGGAAAAATTCTCAATCAACCCCAGCCTT
+TTAGAAACAGAAACAGGATGTAGACGAATATTTGGGCAATTACAACCAGC
+TCTCGAGACAGGAACAGAAGAACTTAGATCATTATATAATACAATAGCAG
+TCCTCTACTTTGTTCATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCT
+CTAGATAAAATAGAGGAAGAACAA------------AAC-----------
+----------AAA---------------TGCAAG------CAGAAG----
+-----------------------------ACACAGCAGGCAGCAGCT---
+------------------------------GACACAGGA-----------
+--------------------------------------------------
+-----------------------------AGCAGCAGCAGTCAAAATTAC
+AGAGGTAGCAGCAGTCAAAATTACCCTATAGTGCAAAATGCACAAGGGCA
+AATGGTACACCAGGCCGTGTCACCTAGGACTTTGAATGCATGGGTCAAAG
+TAGTTGAA---GAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTACA
+GCATTATCAGAAGGAGCCACCCCACAAGACTTAAATACTATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGATACCATCAATG
+AGGAAGCTGCAGAATGGGACAGGTTACATCCAGTACAGGCAGGGCCTATT
+CCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCGGGAGCTAC
+TAGTAACCTTCAGGAACAAATAGGATGGATGACC---------AGCAACC
+CACCTATT---CCAGTGGGAGAAATCTATAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAA
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGATAGGTTCTTTAAAA
+CTCTCAGAGCTGAGCAAGCTACACAGGAGGTAAAAAATTGGATGACAGAC
+ACCTTGCTGGTC---CAAAATGCAAACCCAGATTGTAAATCCATCTTGAG
+GGCATTAGGACCAGGGGCTACATTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCCGGCCATAAAGCAAGGGTTTTAGCTGAAGCAATG---
+---------AGCCAAGTACAA------------------------AATAC
+AAACATAATGATACAGAGA---GGCAAT---TTTAAGGGT---CAAAAA-
+--AGA---ATTAAGTGTTTCAACTGTGGCAAGGAAGGACACCTAGCTAGA
+AATTGCAGGGCCCCTAGGAAAAAGGGCTGCTGGAAATGTGGGAAGGAAGG
+ACATCAAATGAAAGACTGCACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAA---------GGG---
+AGGCCAGGGAATTTTCCTCAGAGC--------------------------
+----AGGACA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAATG------------
+------GAG---------------GAAGAGATA---ACCTCCTCG-----
+-------------------CTGAAGCAGGAG-------------------
+-----------AACAGG---------------------GAGCCGTCC---
+---ACCCCT---GCA---ATTTCCCTCAAATCACTCTTTGGCAACGACCT
+CTTGTCACAGTGA
+>Ref.A2.CM.01.01CM_1445MV.GU201516
+------GCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCTTGGGA
+GAAAATTCGGTTAAGGCCAGGGGGAAGGAAAAAATATAGAATGAAACATT
+TAGTATGGGCAAGCAGGGAGCTGGAAAAATACTCAATCAACCCTGGTCTT
+TTAGAAACATCGGAAGGATGTAAACAAATAATAAGGCAGTTACATTCAGC
+TCTCCCAGTAGGAACAGAAGAACTTAAATCACTATATAATACAATAGCAG
+TCCTCTACTATGTACATCAAAAAATAGAGGTAAAAGACACCAAGGAAGCC
+CTAGATAAATTAGAGGAGGAGCAA------------AAC-----------
+----------AAA---------------TACAAG------CAGAAG----
+-----------------------------ACACAGCAGGCAGCAGCT---
+------------------------------GCCACAGGA-----------
+--------------------------------------------------
+-----------------------------AATAGCAGC------------
+------------AGTCAGAATTATCCCATAGTGCAAAATGCACAAGGGCA
+AATGGTGCACCAGGCCATATCGCCTAGGACTTTGAATGCATGGGTCAAAG
+TAGTAGAA---GAAAAAGCTTTCAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACTATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAGGATACCATCAATG
+AGGAAGCTGCAGAATGGGACAGGGTACATCCAGTACATGCAGGGCCTATT
+CCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAGGATGGATGACC---------AGCAACC
+CACCTATC---CCAGTGGGAGAAATCTATAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAG
+ACAAGGGCCCAAAGAACCCTTTAGAGACTATGTAGATAGGTTCTTTAAAA
+CTCTCAGAGCTGAACAAGCTACACAGGATGTAAAAAATTGGATGACAGAC
+ACCTTGCTGGTC---CAAAATGCGAACCCAGATTGTAAAACTATCCTGAG
+AGCGTTAGGACCAGCGGCTACATTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCCGGCCATAAAGCAAGGGTCTTGGCTGAAGCAATG---
+---------AGCCAAATACACAGTACA------------------AATCA
+AAATGTAATGATGCAGAGA---GGCAAT---TTTAGAGGT---CCAAAA-
+--AGA---ATTAAGTGTTTCAACTGTGGCAAGGAAGGACACCTAGCCAGA
+AATTGCAGGGCCCCTAGGAAAAAGGGTTGCTGGAAATGTGGGAAGGAAGG
+ACATCAAATGAAAGATTGCACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAAAATCTGG---CCT---CCCAACAAA---------GGG---
+AGGCCAGGAAACTTTCCCCAGAGC--------------------------
+----AGAACA------GAGCCAACAGCC----------------------
+-----------------------------CCGCCAGCA---GAGAACGTT
+GGAATGGGG---------------GAAGAGATA---GCTTCCTCG-----
+-------------------TCGAAGCAGGAACTG----------------
+--------AGAAACAGG---------------------GAACAACAC---
+---ACTCCT---ACA---ATTTCCCTCAGATCACTCTTTGGCAACGACCC
+CTTGTCACAGTAA
+>Ref.A2.CY.94.94CY017_41.AF286237
+ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCTTGGGA
+GAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGACTGAAACATT
+TGGTATGGGCAAGCAGGGAGCTGGAGAAATTCTCAATTAACCCTGGCCTT
+TTAGAAACACCAGAGGGATGTAGACAAATAATAAGGCAGTTACAACCAGC
+TCTCCAAACAGGAACAGAAGAACTTAAATCATTATATAATACAGTAGTAG
+TCCTCTACTGGGTACATCAAAGGGTAGATGTAAAAGACACCAAGGAAGCT
+CTAGATAAAATAGAGGAAGAACAA------------AAC-----------
+----------AAG---------------------------CAGAAA----
+-----------------------------ACACAGCATGCAGCAGCT---
+------------------------------GACACAGGG-----------
+--------------------------------------------------
+-----------------------------AACAGCAGC------------
+------------AGTCAAAATTATCCCATAGTGCAAAATGCACAAGGGCA
+AATGGTACACCAGGCTATATCACCTAGGACGTTGAATGCCTGGGTCAAAG
+TAGTAGAA---GAAAAGGCTTTCAGCCCAGAAGTAATACCTATGTTTACA
+GCATTATCAGAAGGAGCCACCCCACAAGACTTAAATACTATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGATACCATCAATG
+AGGAAGCTGCAGAATGGGACAGGGTACATCCAGTACATGCAGGGCCTATT
+CCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAGGTTGGATGACC---------AGCGATC
+CACCCATC---CCAGTGGGAGAAATTTATAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAG
+ACAAGGGCCAAAAGAACCCTTTAGAGATTATGTGGATAGGTTCTTTAAAA
+CTCTAAGAGCTGAGCAAGCCACACAGGAGGTAAAAAACTGGATGACGGAC
+ACCTTGCTGGTC---CAAAATGCGAACCCAGATTGTAGATCCATCTTGAG
+AGCATTAGGACCAGGGGCCTCATTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCCAGCCATAAAGCAAGGGTTTTGGCTGAAGCAATG---
+---------AGCCATGTACAAAGTACA------------------AATAC
+AAACATAATGATGCAGAGA---GGCAAT---TTTAGGGGT---CAAAAA-
+--AGA---ATTAAGTGTTTCAACTGTGGCAAGGAAGGACACCTAGCCAGA
+AATTGCAGGGCCCCTAGGAAAAAGGGCTGCTGGAAATGTGGAAAGGAAGG
+ACATCAAATGAAAGATTGCACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAA---------GGG---
+AGGCCAGGAAATTTTCCTCAGAGC--------------------------
+----AGAACA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAACTTG
+AGAATGGGG---------------GAAGAGATA---ACCTCCTCC-----
+-------------------CTGAAGCAGGAACTG----------------
+--------GAGACCAGG---------------------GAACCATAC---
+---AATCCT---GCA---ATTTCCCTCAAATCACTCTTTGGCAACGACCC
+CTTGTTACAGTAA
+>Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGA
+AAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATA
+TAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTG
+TTAGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCATC
+CCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAA
+CCCTCTATTGTGTGCATCAAAGGATAGAGATAAAAGACACCAAGGAAGCT
+TTAGACAAGATAGAGGAAGAGCAA------------AAC-----------
+----------AAA---------------AGTAAG------AAAAAA----
+-----------------------------GCACAGCAAGCAGCAGCT---
+------------------------------GACACAGGA-----------
+--------------------------------------------------
+-----------------------------CACAGCAATCAG---------
+---------GTCAGCCAAAATTACCCTATAGTGCAGAACATCCAGGGGCA
+AATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAG
+TAGTAGAA---GAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATG
+AGGAAGCTGCAGAATGGGATAGAGTGCATCCAGTGCATGCAGGGCCTATT
+GCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAGGATGGATGACA---------AATAATC
+CACCTATC---CCAGTAGGAGAAATTTATAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAG
+ACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAA
+CTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAA
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAGACTATTTTAAA
+AGCATTGGGACCAGCGGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTAGGAGGACCCGGCCATAAGGCAAGAGTTTTGGCTGAAGCAATG---
+---------AGCCAAGTAACAAAT---------------------TCAGC
+TACCATAATGATGCAGAGA---GGCAAT---TTTAGGAAC---CAAAGA-
+--AAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACACAGCCAGA
+AATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGG
+ACACCAAATGAAAGATTGTACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAAGATCTGG---CCT---TCCTACAAG---------GGA---
+AGGCCAGGGAATTTTCTTCAGAGC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGAA---GAGAGCTTC
+AGGTCTGGG---------------GTAGAGACAACAACTCCCCCT-----
+-------------------CAGAAGCAGGAGCCG----------------
+--------ATAGACAAG---------------------GAACTGTAT---
+------CCT---TTA---ACTTCCCTCAGGTCACTCTTTGGCAACGACCC
+CTCGTCACAATAA
+>Ref.B.NL.00.671_00T36.AY423387
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATAGATGGGA
+AAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAGATATAAATTAAAACATA
+TAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTT
+TTAGAGACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAGC
+CCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATACAGTAGCAA
+CCCTCTATTGTGTGCATGCAAGGATAGAGGTAAAAGACACCAAGGAAGCT
+TTAGAAAAAATAGAGGAAGAACAA------------AAC-----------
+----------AAA---------------AGTAAG------AAACGG----
+--------------GCACAGCAA------GCACAGCAAGCAGAAGCT---
+------------------------------GACGCAGGA-----------
+--------------------------------------------------
+-----------------------------AAAAACAACCCG---------
+---------GTCAGCCAGAATTACCCTATAGTGCAGAATCTCCAAGGGCA
+AATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAG
+TAGTAGAA---GAGAAGGCCTTCAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATG
+AGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCCGGGCCTATT
+GCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAGGATGGATGACA---------AATAATC
+CACCTATC---CCAGTAGGAGAAATATATAAGAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTACCAGCATTCTGGACATAAA
+ACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAA
+CTTTAAGAGCTGAGCAAGCCTCACAGGAAGTAAAAAATTGGATGACAGAA
+ACCTTGTTGGTC---CAAAATTCGAACCCAGATTGTAAAACTATTTTAAA
+AGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACCGCATGTCAGG
+GAGTAGGGGGACCCGGCCATAAAGCGAGAGTTTTGGCTGAGGCAATG---
+---------AGCCAAGTAACAAGT---------------------GCACC
+TGCCATAATGATGCAGAGA---GGCAAT---CATAGAAAC---CAAAGA-
+--AGGACTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCAGA
+AATTGCAGGGCCCCTAGNAAAAAGGGCTGTTGGAAATGTGNAAAGNAAGG
+ACACCAAATGAAAGATTGTACT---------NAG---AGACAGGCTANTT
+TTTTAGGGAAGATTTGG---CCT---TCCCACAAG---------GGG---
+AGGCCAGGGAATTTTCTTCAGAGC--------------------------
+----AGACCA------GAGCCAACAGCC----------------CCTTCT
+CAGAGCAGACCAGAGCCAAC---AGCC--CCACCAGAA---GAGAGCTTC
+AGGTTTGGG---------------GAAGAGACAACAACTCCCTCT-----
+-------------------CAGAGGCAGGAGCCA----------------
+--------ACAGACAAG---------------------GAACTGTAT---
+------CCT---TTA---GCTTCCCTCAAATCACTCTTTGGCAGCGACCC
+ATAGTCACAATAA
+>Ref.B.TH.90.BK132.AY173951
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGACAATTAGATAGATGGGA
+GAAAATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATA
+TAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTA
+TTGGAAACATCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCAAG
+CCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAG
+TCCTCTATTGTGTACATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCT
+TTAGAGAAGATAGAGGAAGAACAA------------AAC-----------
+----------AAA---------------AGTAAG------AAAAAG----
+-----------------------------GCACAGCAAGCAGCAGCT---
+------------------------------AACACAGAA-----------
+--------------------------------------------------
+-----------------------------AACAGCAGCCAG---------
+---------GTTAGCCAAAATTACCCTATAGTGCAAAATATGCAGGGGCA
+AATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAG
+TAGTAGAA---GAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATG
+AGGAAGCTGCAGAATGGGATAGATTGCATCCAGTGCATGCAGGGCCTATT
+GCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAGGATGGATGACA---------CATAATC
+CACCTATC---CCAGTGGGAGAAATTTACAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGGATGTATAGCCCTACCAGCATTTTGGACATAAG
+ACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAGA
+CTCTAAGAGCCGAGCAAGCCTCACAGGAGGTAAAAAATTGGATGACAGAA
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAGACTATTTTAAA
+AGCATTGGGACCAGCAGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGTCCCGGCCATAAGGCAAGAGTTTTGGCGGAAGCAATG---
+---------AGCCAAGTGACAAAT---------------------TCAGC
+TACCATAATGATGCAGAAA---GGCAAT---TTTAGGAAC---CAAAGA-
+--AAGATTGTTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCCCGA
+AATTGCAGGGCCCCTAGGAAGAAGGGCTGTTGGAGATGTGGAAAGGAAGG
+ACACCAAATGAAAGATTGTACT---------GAA---AGACAGGCTAATT
+TTTTAGGGAAAATCTGG---CCT---TCCCACAAG---------GGA---
+AGGCCAGGGAATTTTCTTCAGAGC--------------------------
+----AGACCA------GAGCCGACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+AGGTTTGGG---------------GAAGAGACAACAACTCCCTCT-----
+-------------------CAGAAGCAGGAGACA----------------
+--------ATAGACAAG---------------------GAACTATAT---
+------CCT---TTA---ACTGCCCTCAAATCACTCTTTGGCAACGACCC
+CTCGTCACAATAA
+>Ref.B.US.98.1058_11.AY331295
+------------GCGTCAGTATTAAGCGGGGGAAAATTAGATACATGGGA
+GAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATA
+TAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCACTTAACCCTGGCCTG
+TTAGAAACAGCAGAAGGCTGTAGACAATTATTGGGACAGCTACAGCCATC
+CCTTCAAACAGGATCAGAAGAACTTAAATCATTATTTAATACAATAGCAA
+CCCTCTATTGTGTACATCAAAGGATAGAGGTAAGAGACACCAAAGAGGCT
+TTAGACAAGATAGAGGAAGAGCAA------------AAC-----------
+----------AAA---------------AGTAAG------AAAAAA----
+-----------------------------GCACAGCAAGCAGCAGCT---
+------------------------GCAGCTGACACAGGA-----------
+--------------------------------------------------
+-----------------------------AACAGCAGCCAG---------
+---------GTCAGCCAAAATTACCCTATAGTGCAGAACCTCCAAGGGCA
+AATGGTACATCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAG
+TAATAGAA---GAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTGCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAAACCATCAATG
+AGGAAGCTGCAGAATGGGATAGAATACATCCAGCGCAAGCAGGGCCTATA
+GCACCAGGCCAGATAAGAGACCCAAGGGGAAGTGACATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAACATGGATGACA---------AATAATC
+CACCTATC---CCAGTAGGAGAAATCTATAAAAAATGGATAATCATGGGA
+TTAAATAAAATAGTAAGGATGTATAGTCCTACCAGCATTCTGGACATAAG
+ACAAGGACCAAAGGAACCCTTTAGGGACTATGTAGACCGGTTCTATAAAA
+CTCTAAGAGCCGAGCAAGCTTCACAGGAGGTAAAAAATTGGATGACAGAA
+ACCTTGTTGGTC---CAAAACGCGAACCCAGATTGTAAGACTATATTAAA
+AGCATTAGGACCAGCAGCTACACTAGAGGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCTGAAGCAATG---
+---------AGCCAAGTAACAAAT---------------------TCAGG
+TGCCATAATGATGCAGAAA---GGCAAT---TTTAGGAAC---CAG----
+-----GTTGTTAGGTGTTTCAATTGTGGCAAAGTAGGGCACATAGCCAAA
+AATTGCAGGGCCCCTAGGAAGAAGGGCTGTTGGAAATGTGGAAAAGAAGG
+ACACCAAATGAAAGATTGTGAT---------CAG---AGACAGGCTAATT
+TTTTAGGGAAGATCTGG---CCT---TCCCACAAA---------GGA---
+AGGCCAGGGAATTTTCTTCAGAGC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+AGGTTTGGG---------------GAGGAGACAACAACTCCCTCT-----
+-------------------CAGAAGCAGGAGCCA----------------
+--------CAAGAA------------------------TAT---------
+------CCT---TTA---GCTTCCCTCAGATCACTCTTTGGCAACGACCC
+CTCGTCACAATAA
+>Ref.C.BR.92.BR025_d.U52953
+ATGGGTGCGAGAGCGTCAATATTAAGAGGCGGAAAATTAGATGCTTGGGA
+AAGAATTAAGTTAAAGCCAGGGGGAAAGAAACACTATATGATGAAACACC
+TAGTCTGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTGACCCTGGCCTT
+TTAGAGACATCCGAAGGCTGTAAACAAATAATGAAACAGCTACAACCAGC
+TCTTCAGACAGGAACAAAGGAACTTATATCATTACATAATACGGTTGCAA
+CTCTCTATTGTGTACATGAAAAGATAGATGTACGAGACACCAAGGAAGCC
+TTAGACAAAATAAAGGAAGAACAA------------AAC-----------
+----------AAA---------------AGTCAG------CAAAAA----
+-----------------------------ACACAGCAGGCAGAAGCG---
+---------------------------GCTGACAAAGGA-----------
+--------------------------------------------------
+--------------------------------------AAG---------
+---------GTCAGTCAAAATTATCCTATAGTACAGAATCTCCAAGGGCA
+AATGGTACACCAGCCCATATCAGCTAGAACTTTGAATGCGTGGGTAAAGG
+TAGTAGAG---GAGAAGGCTTTCAGCCCAGAGGTAATACCCATGTTTACA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGTTAAATAC
+AGTGGGGGGACACCAAGCAGCCATGCAAATGTTAAAAGATACCATCAATG
+AGGAGGCTGCAGAATGGGATAGATTACATCCAGTGCATGCAGGGCCTGTC
+GCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTAC
+CAGTACCCTTCAGGAACAAATAACATGGATGACA---------AATAACC
+CACCTGTC---CCAGTAGGAGACATCTATAAAAGATGGATAATTCTGGGG
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAA
+ACAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTTTAAAA
+CTCTAAGAGCAGAGCAAGCTACCCAAGATGTAAAAAATTGGATGACAGAT
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAGACCATTTTAAG
+AGCATTAGGGCCAGGGGCTTCATTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTGGCCACAAAGCAAGAGTGTTGGCTGAGGCAATG---
+---------AGCAAAGTAAAC------------------------AATAC
+AAACATAATGATGCAAAGA---AGCAAT---TGTAAAGGC---CCTAAA-
+--AGAACTATTAAATGCTTCAACTGTGGCAAGGAAGGGCACTTAGCCAGA
+AATTGCAGGGCTCCTAGGAAAAAAGGCTGTTGGAAATGTGGAAAAGAAGG
+ACACCAAGTGAAAGACTGTACT---------GAG---AGGCAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCCACAGG---------GGG---
+AGGCCAGGAAATCTTCTTCAGAAC--------------------------
+----AGAACA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGAA---GAGAGCTTC
+AGGTTTGGG---------------GAAGAGACAACAACTCCCTCT-----
+-------------------CGGAAGCAGGAGACG----------------
+--------ATAGACAAG---------------------GAACTG------
+------CCC---TTA---ACTTCCCTCAAATCACTCTTTGGCAGCGACCC
+CTTGTCAACATAA
+>Ref.C.ET.86.ETH2220.U46016
+ATGGGTGCGAGAGCGTCAATATTAAGAGGCGAAAAATTAGATGCCTGGGA
+AAAAATTAAGTTAAGGCCAGGGGGAAAGAAACACTATATGCTGAAACACC
+TAGTCTGGGCAAACAGGGAGCTGGAAAAATTTGCACTTAACCCTGACCTT
+TTAGATACATCAGCAGGCTGTAAACAAATAATTAAACAGCTACAACCAGC
+TCTTCAGACAGGAACAGAGGAACTTAAATCATTATTTAATACAGTGGCAA
+CTCTCTATTGTGTACATCAAAAGATAGAGATAAAAGACACCAAGGAAGCC
+TTAGACAAGATAGAGGAAGAACAA------------AAC-----------
+----------GAA---------------AGTCAG------CAAAAA----
+-----------------------------ACACAGCAGGCAGGAGCA---
+---------------------------GCTGACAGAGGA-----------
+--------------------------------------------------
+--------------------------------------AAG---------
+---------GACAGTCAAAATTATCCTATAGTGCAGAATATGCAGGGGCA
+AATGGTACATCAGCCCATATCAGCTAGAACTTTGAATGCATGGGTAAAAG
+TAGTAGAG---GAAAAGGCTTTCAGCCCAGAGGTAATACCCATGTTTACA
+GCTTTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAATAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGACACCATCAATG
+AGGAGGCTGCAGAATGGGACAGGTTACATCCAGTGCATGCAGGGCCTGTT
+GCACCAGGCCAAATGAGAGACCCAAGGGGAAGTGACATAGCAGGAACAAC
+TAGTACCCTTCAGGAACAAATAGCATGGATGACA---------GGGAACC
+CACCTGTT---CCAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGG
+CTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAA
+ACAAGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTTTAAAA
+CCTTAAGAGCTGAACAAGCTACACAAGATGTAAAAAATTGGATGACAGAC
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAAACCATTTTAAG
+AGCATTAGGGCCAGGGGCTTCATTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTGCCCACAAAGCAAGAGTGTTGGCTGAGGCAATG---
+---------AGCCAAGTAAAC------------------------AATAC
+AACCATAATGATGCAGAAA---AGCAAT---TTTAAGGGC---CCTAAA-
+--AGAGCAATTAAATGTTTCAACTGTGGCAAGGAAGGGCACCTAGCCAGA
+AATTGCAGGGCCCCTAGGAAAAAAGGCTGTTGGAAATGTGGAAAGGAAGG
+ACACCAAATGAAAGACTGTACC---------GAG---AGACAGGCTAATT
+TTTTAGGGAGACTTTGG---CCT---TCCAACAAG---------GGA---
+AGGCCAGGGAATTTCCTTCAGAGC--------------------------
+----AGACCA------GAGCCAACAGCC---CCACCAGAGAGTCT-----
+-----CAGACCAGAGCCAAC---AGCC--CCACCACCA---GAGAGCTTC
+AGGTTCGAG---------------GAA---GCA---ACACCTTCT-----
+-------------------CCGAAGCAGGAGCTG----------------
+--------AAAGACAGG---------------------GAA---------
+------GCC---TTA---ACTTCCCTCAAATCACTCTTTGGCAACGACCA
+CTTGTTACAATAA
+>Ref.C.IN.95.95IN21068.AF067155
+ATGGGTGCGAGAGCGTCAATATTAAGAGGGGGAAAATTAGATAAATGGGA
+AAAAATTAGGTTAAGGCCAGGGGGAAAGAAACGCTATATGCTAAAACACC
+TAGTATGGGCAAGCAGGGAGCTGGACAGATTTGCAGTTAACCCTGGCCTT
+TTAGAGACAGCAGAAGGCTGTAAACAAATAATAAAACAGCTACAACCAGC
+TCTTCAGACAGGAACAGAGGAACTTAGATCATTATTCAACACAGTAGCAA
+CTCTCTATTGTGTACATGCAGGGATAGAAGTACGAGACACCAAAGAAGCC
+TTAGACAAGATAGAAGAAGAACAA------------AAC-----------
+----------AAA---------------ATTAAG------CAAAAA----
+-----------------------------ACACAGCAGGCAAAAGAG---
+---------------------------GATGAC---GGG-----------
+--------------------------------------------------
+--------------------------------------AAG---------
+---------GTCAGTCAAAATTATCCTATAGTGCAGAATCTCCAAGGGCA
+AATGGTACACCAAGCCATATCACCTAGAACTTTGAATGCATGGGTAAAAG
+TAATAGAG---GAAAAGGCTTTTAGCCCAGAGGTAATACCCATGTTTACA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGTTAAATAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGATACCATCAATG
+AAGAGGCTGCAGAATGGGATAGATTACATCCAGTACCTGCAGGGCCTATT
+GCACCAGGCCAACTGAGAGAACCAAGGGGAAGTGATATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAGCATGGATGACG---------AATAACC
+CACCTGTT---CCAGTGGGAGACATCTATAAAAGATGGATAATTCTGGGG
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTGAGCATTTTGGACATAAG
+ACAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGACCGGTTCTTTAAAA
+CTTTAAGAGCTGAACAAGCTACACAAGATGTAAAAAATTGGATGACAGAA
+ACCTTGTTGGTC---CAAAATGCGAATCCAGATTGTAAGACCATTTTAAG
+AGCATTAGGACCAGGGGCTTCATTAGAAGAGATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTAGCCACAAAGCAAGAGTGTTGGCTGAGGCAATG---
+---------AGCCAAACAAAC---------------------------AG
+TGCCATACTGATGCAAAGA---AGCAAT---TTTAAAGGC---TCTAAA-
+--AGAATTGTTAAATGTTTCAACTGTGGCAAGGAAGGGCACCTAGCCAGA
+AATTGCAGGGCCCCTAGGAAAAAAGGCTGTTGGAAATGTGGAAAGGAAGG
+ACACCAAATGAAAGACTGTACT---------GAA---AGGCAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCCACAAG---------GGG---
+AGGCCAGGGAATTTCCTCCAGAGT--------------------------
+----AGACCA------GAGCCAACAGCT----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+AGGTTCGAG---------------GAG---ACA---ACCCCAGCT-----
+-------------------CCGAAGCAGGAGCCG----------------
+--------AAAGACAGG---------------------GAA---------
+------CCT---TTA---ACTTCCCTCAGATCACTCTTTGGCAGCGACCC
+CTTGTCTCAATAA
+>Ref.C.ZA.04.04ZASK146.AY772699
+ATGGGTGCGAGAGCGTCAGTATTAAGAGGCGAAAAATTAGATACATGGGA
+AAAAATTAGGTTAAGGCCAGGGGGAAAGAAACACTATATGCTAAAACACA
+TAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTCAACCCTGGCCTT
+TTAGAAACATCAGAAGGCTGTAAACAAATATTGGCACAAATACAACCAGC
+TATTCAGACAGGAACAGAGGAACTTAAATCATTATTCAACACAATAGCAG
+TTCTCTATTGTGTACATAAAAAGATAGATGTAAGAGACACCAAGGAAGCC
+TTAGACAAGATAGAGGAAGAGCAA------------AAC-----------
+----------AAA---------------AGTCAG------CAAAAA----
+-----------------------------ACACAGCAGGCAAAAGCG---
+---------------------------GCTGAC---GAA-----------
+--------------------------------------------------
+--------------------------------------AAG---------
+---------GTCAGTCAAAATTTTCCTATAGTACAGAATCTTCAAGGGCA
+AATGGTACATCAACCCCTATCACCTAGAACCTTGAATGCATGGGTAAAAG
+TAATAGAG---GAGAAGGGTTTTAACCCAGAGGTAATACCCATGTTTACA
+GCATTATCAGAGGGAGCCACCCCACAAGATTTGAACACCATGCTAAATAC
+GGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGATACCATCAATG
+AAGAGGCTGCAGAATGGGATAGATTACATCCAGTACATGCAGGGCCTGTC
+GCACCAGGCCAAATGAGAGAACCAAGGGGAAGTGACATAGCAGGAACTAC
+TAGTAACCTTCAGGAACAAGTAGCATGGATGACA---------AGTAACC
+CACCTATT---CCAGTGGGAGACATCTATAAAAGATGGATAATTCTGGGA
+TTAAATAAAATAGTGAGGATGTATAGCCCGGTCAGCATTTTGGACATAAA
+ACAAGGGCCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTTTAAAA
+CTTTAAGAGCTGAACAAGCTACACAAGAGGTAAAAAATTGGATGACAGAC
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAGACCATTTTAAG
+AGCATTAGGACCAGGGGCTACATTAGAAGAAATGATGGCAGCATGTCAAG
+GGGTGGGAGGACCTGGCCACAAGGCAAGAGTGTTGGCTGAGGCAATG---
+---------AGCCAAATAAAC------------------------AATGG
+AAACATAATGATGCAGAGA---AGTAAT---TTTAAAGGC---CCTAAA-
+--AGAATTGTTAAATGTTTTAACTGTGGCAAGGGGAGGCACATAGCCAAA
+AATTGCAGGGCCCCTAGGAAAAAAGGCTGTTGGAAATGTGGAAAGGAAGG
+ACACCAAATGAAAGACTGTACT---------GAA---AGGCAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCCAGAAG---------GGG---
+AGGCCAGGGAATTTTCTCCAGAAC--------------------------
+----AGACTA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+AGGTTCGAG---------------GAG---ACG---ACCCCTGCT-----
+-------------------CCGAAACAGGAGCTG----------------
+--------AAAGACAGG---------------------GAA---------
+------CCT---TTA---ACTTCCCTCAGATCACTCTTTGGCAGCGACCC
+CTTGTCTCAATAA
+>Ref.D.CD.83.ELI.K03454
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATAAATGGGA
+AAAAATTCGGTTACGGCCAGGAGGAAAGAAAAAATATAGACTAAAACATA
+TAGTATGGGCAAGCAGGGAGCTAGAACGATATGCACTTAATCCTGGCCTT
+TTAGAAACATCAGAAGGCTGTAAACAAATAATAGGGCAGCTACAACCAGC
+TATTCAGACAGGAACAGAAGAACTTAGATCATTATATAATACAGTAGCAA
+CCCTCTATTGTGTACATAAAGGAATAGATGTAAAAGACACCAAGGAAGCT
+TTAGAAAAGATGGAGGAAGAGCAA------------AAC-----------
+----------AAA---------------AGTAAG------AAAAAG----
+-----------------------------GCACAGCAAGCAGCAGCT---
+------------------------------GACACAGGA-----------
+--------------------------------------------------
+-----------------------------AACAACAGCCAG---------
+---------GTCAGCCAAAATTATCCTATAGTGCAGAACCTACAGGGGCA
+AATGGTACATCAGGCCATATCACCTAGAACTTTGAACGCATGGGTAAAAG
+TAATAGAA---GAAAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGCTAAAAGAGACCATCAATG
+AAGAAGCTGCAGAATGGGATAGGTTACATCCAGTGCATGCAGGGCCTATT
+GCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGATATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAGCATGGATGACA---------AGTAACC
+CACCTATC---CCAGTAGGAGAAATCTATAAAAGATGGATAATTGTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAG
+ACAGGGACCAAAGGAACCTTTTAGAGACTATGTAGACCGGTTCTATAAAA
+CTCTAAGAGCCGAGCAAGCTTCACAGGATGTAAAAAATTGGATGACAGAA
+ACCTTGTTGGTC---CAAAATGCAAACCCAGATTGCAAGACTATCTTAAA
+AGCATTGGGACCACAGGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGGGGGCCCAGCCATAAAGCAAGAGTTCTGGCTGAGGCAATG---
+---------AGCCAAGCAACAAATTCA------------------GTTAC
+TACAGCAATGATGCAGAGA---GGCAAT---TTTAAGGGC---CCAAGA-
+--AAAATTATTAAGTGTTTCAATTGTGGCAAAGAAGGGCACATAGCAAAA
+AATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAGATGTGGAAAGGAAGG
+ACACCAACTAAAAGATTGCACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAGAATTTGG---CCT---TCCCACAAG---------GGA---
+AGGCCGGGGAACTTTCTCCAAAGC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+GGGTTTGGG---------------GAAGAGATA---ACCCCCTCT-----
+-------------------CAAAAACAGGAGCAG----------------
+--------AAAGACAAG---------------------GAACTGTAT---
+------CCT---TTA---ACTTCCCTCAAATCACTCTTTGGCAACGACCC
+CTTGTCGCAATAA
+>Ref.D.CM.01.01CM_4412HAL.AY371157
+------GCGAGAGCGTCAATATTAAGCGGGGGAAAATTGGATGCATGGGA
+AAAAATTCGGTTACGGCCAGGGGGAAGCAAAAAGTATAGGCTAAAACATC
+TAATATGGGCAAGCAATGAGCTAGAACGATTTGCACTTAATCCTGGCCTT
+TTAGAGACATCAGATGGCTGTAAACAAATACTAGGCCAGCTACAACCAGC
+TCTTAAAACAGGAACAGAAGAACTTAGATCATTATTTAATGCAGTAGCAG
+TACTCTATTGTGTACATGAAAGGATAGAGGTAAAGGACACCAAGGAAGCC
+TTAGACAAGATAGAGGAAGAACAA------------AAC-----------
+----------AAA---------------AGTAAG------AAAAAA----
+-----------------------------GCACAGCAAGCAGCAGCT---
+------------------------------GACACAGGG-----------
+--------------------------------------------------
+-----------------------------GACAACAAACAG---------
+---------GTCAGCCAAAATTATCCTATAGTGCAGAACTTACAGGGGCA
+AATGGTACACCAAGCCCTATCACCCAGAACCTTGAACGCATGGGTAAAAG
+TAATAGAG---GAAAAGGCTTTCAACCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAGGATTTAAATACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGAGACCATCAATG
+AGGAAGCTGCAGAATGGGACAGGCTACATCCAGTGCAAGCAGGGCCTGTT
+GCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGATATTGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAGGGTGGATGACA---------AGTAATC
+CACCAATC---CCAGTAGGGGAAATCTATAAAAGATGGATAATCTTGGGA
+TTGAATAAAATAGTAAGAATGTATAGCCCTGTCAGTATTTTGGATATAAG
+ACAAGGACCAAAAGAACCCTTTAGAGACTATGTAGACCGGTTCTATAAAA
+CTCTAAGAGCCGAGCAAGCTACACAGGAAGTAAAAAATTGGATGACAGAA
+ACCTTGTTGGTT---CAAAATGCAAACCCAGACTGTAAAACTATCTTAAA
+GGCGTTGGGGCCAGGGGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGGCCCGGCCATAAAGCAAGAGTTTTGGCTGAGGCAATG---
+---------AGCCAAGCAACAGCAGGT------------ATG---AATGC
+TGCAATAATGATGCAGAGG---GGCAAT---TTTAAGGGC---CCAAAG-
+--AGAATTGTTAAGTGTTTCAACTGTGGCAAAGAAGGGCACATAGCAAAA
+AATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGAGAAGG
+ACACCAAATGAAAGATTGCACA---------GAA---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAG---------GGA---
+AGGCCAGGGAACTTTCTTCAGAGC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+GGGTTCGGG---------------GAGGAGATA---GCCCCCTCT-----
+-------------------CAGAAACAGGAGCAG----------------
+--------AAAGACAAAGACCAG---------------GAACTGTAT---
+------CCT---TTA---ACTTCCCTCAAATCACTCTTTGGCAACGACCC
+CTTGTCACAATAA
+>Ref.D.TZ.01.A280.AY253311
+------GCGAGAGCGTCAGTATTAAGCGGGGGACAATTAGATGCATGGGA
+AAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAACTAAAACATA
+TAGTATGGGCAAGCAGGGAGTTAGAACGATTTGCACTTAATCCTGGCCTT
+TTAGAGACATCCGAAGGCTGTAAACAAAT----------CTACAACCAGC
+TATTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATACAGTAGCAA
+CCCTCTATTGTGTGCATAGAAAGATAGAGGTAAAAGACACCAAGGAAGCT
+TTAGAAAAATTAGAGGAAGAGCAA------------ACC-----------
+----------AAA---------------AGTAAG------AAAAAG----
+-----------------------------GCACAGCAAGCAACAGCT---
+------------------------------GACACAGGA-----------
+--------------------------------------------------
+-----------------------------AGCAGCAGCCAG---------
+---------GTCAGCCAAAATTATCCTATAGTGCAAAACCTACAGGGGCA
+AATGGTACACCAGGCCATATCACCTAGAACCTTGAACGCATGGGTAAAAG
+TAATAGAA---GAGAAGGCTTTCAGCCCAGAAGTGATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACAATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCTATGCAAATGTTAAAAGAGACCATCAATG
+AGGAAGCTGCAGAATGGGATAGGCTACATCCAGTGCATGCAGGGCCTATT
+GCACCAGGACAGATGAGAGAACCAAGGGGAAGTGATATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAGCATGGATGACA---------AATAATC
+CACCTGTC---CCAGTAGGAGAAATATATAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTTAGCATTTTGGACATAAG
+ACAAGGACCAAAGGAGCCCTTTAGGGACTATGTAGATCGGTTCTATAAAA
+CTCTAAGAGCCGAGCAAGCTTCACAAGATGTAAAAAATTGGATGACTGAA
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAAACTATCTTAAA
+AGCATTGGGACCAGCGGCTACATTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGGGGACCCAGTCATAAGGCAAGAGTTCTAGCTGAGGCAATG---
+---------AGCCAAGCAACAAATGTA------------------AATGC
+TGCCATAATGATGCAGAGA---GGTAAT---TTTAAGGGC---CCAAGG-
+--AAAATCATTAAGTGTTTCAACTGTGGCAAAGAAGGACACATAGCAAAA
+AATTGCAGGGCCCCAAGAAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGG
+ACACCAAATGAAAGATTGCACT---------GAA---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCCACAAA---------GGA---
+AGGCCAGGGAACTTCCTTCAGAGC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGATCTTC
+GGGTTTGGG---------------GAGGAGATA---AAACCCTCT-----
+-------------------CAGAAACAGGAGCAG----------------
+--------AAAGACAAG------------GACAAG---GAACTGTAT---
+------CCT---TCA---GCTTCCCTCAAATCACTCTTTGGCAACGACCC
+CTTGTCACAATAA
+>Ref.D.UG.94.94UG114.U88824
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGAGGAAAATTAGATGAATGGGA
+AAAAATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAGACTAAAACATC
+TAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCACTTAATCCTGGCCTT
+TTAGAAACATCAGAAGGCTGTAGACAAATAATAAGACAGCTACAACCATC
+TATTCAGACAGGATCAGAGGAAATTAAATCATTATATAATACAGTGGTAA
+CCCTCTATTGTGTACATGAGAGGATAAAGGTAGCAAGCACCAAGGAAGCT
+TTAGACAAGATAGAGGAAGAACAA------------GCC-----------
+----------AAA---------------AGTAAG------AAAAAA----
+-----------------------------GCACAGCAAGCAACAGCT---
+------------------------------GACACAAGA-----------
+--------------------------------------------------
+-----------------------------AACAGCAGCCAG---------
+---------GTCAGCCAAAATTATCCTATAGTGCAAAACCTACAGGGGCA
+AATGGTACACCATCCCCTATCACCTAGAACTTTGAACGCATGGGTAAAAG
+TAATAGAG---GAGAAGGCTTTCAACCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAGGATACCATCAATG
+AGGAAGCTGCAGAATGGGATAGGCTACATCCAGTGCATGCAGGGCCTGTT
+GCACCAGGCCAATTGAGAGAACCAAGGGGAAGTGATATAGCAGGAACTAC
+TAGTAACCTTCAGGAACAAATAGGATGGATGACA---------AGCAATC
+CACCTATC---CCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAG
+ACAAGGACCAAAGGAACCCTTTAGAGACTATGTAGATCGGTTCTATAAAA
+CTCTAAGAGCCGAGCAAGCTTCACAGGATGTAAAAAATTGGATGACTGAA
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAAACTATCTTAAA
+AGCATTGGGACCAGCGGCTACATTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGGGGACCCAGTCATAAAGCAAGAGTTTTGGCTGAGGCAATG---
+---------AGCCAAGCAACAAATGCA------------------AATAC
+TGCTATAATGATGCAGAGA---GGCAAT---TTTAAGGGC---CCAAAG-
+--AAAATCATTAAGTGTTTCAACTGTGGCAAAGAAGGGCACACGGCAAAA
+AATTGCAGGGCTCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGG
+ACACCAAATGAAAGATTGCACT---------GAA---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCCACAAT---------GGA---
+AGGCCAGGGAATTTCCTTCAGAGC--------------------------
+----AGACCCCCAGCAGAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGATCTTC
+GGATTAGGG---------------GAGGAGATA---ACACCTCCT-----
+-------------------CAGAAACAGGAGCAG----------------
+--------AAAGACAAG---------------------GAACTGTAT---
+------CCT---TTA---ACCTCCCTCAAATCACTCTTTGGCAACGACCC
+GTTGTCACAGTAA
+>Ref.F1.BE.93.VI850.AF077336
+ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGA
+AAAAATTCAGTTAAGGCCGGGGGGAAAGAAAAGATATAAAATGAAACATC
+TAATATGGGCAAGCAGGGAGCTAGAACGATTTGCACTTGATCCTGGCCTT
+CTAGAAACATCAGAAGGCTGTCAAAAAATAATAAGACAGCTACAACCATC
+CCTTCAGACAGGATCAGAAGAGCTTAAGTCATTATTTAATACAGTAGCAG
+TCCTCTATTATGTACATCAAAGGGCAGGGGTAACAGACACCAAGGAAGCT
+TTAGACAAGCTAGAGGAAGAACAA------------AAC-----------
+----------AAA---------------AGTCAG------CAAAAG----
+-----------------------------ACACAGCAAGCGGCAGCT---
+------------------------------GACAAAGGG-----------
+--------------------------------------------------
+--------------------------------------------------
+---------GTCAGTCAAAATTACCCTATAGTACAGAATCTTCAGGGACA
+AATGGTACACCAGTCTCTATCACCTAGAACTTTAAATGCATGGGTAAAGG
+TGATAGAA---GAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGGGCCACTCCCACAGATTTAAACACCATGCTAAATAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGACACCATCAATG
+AGGAAGCTGCAGAATGGGACAGATTACATCCAGTGCATGCAGGGCCTGCC
+CCACCAGGCCAGATGAGGGAACCTAGGGGGAGTGATATAGCTGGAACTAC
+TAGTACCCTTCAGGAACAAATACAATGGATGACG---------GGCAACC
+CACCTGTC---CCAGTGGGAGACATCTATAAAAGATGGATCATCCTAGGA
+TTAAATAAAATAGTAAGAATGTATAGTCCTGTCAGCATTTTGGACATAAA
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGACAGATTCTTTAAAG
+TCCTAAGAGCTGAGCAAGCTTCACAGGACGTAAAGGGTTGGATGACAGAC
+ACATTGTTGGTC---CAAAATGCGAACCCAGATTGTAAGACCATTTTAAA
+AGCATTGGGAACAGGGGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTAGCCATAAGGCAAGAGTTTTGGCCGAGGCAATG---
+---------AGTCAAGCAAAT---------------------------TC
+AGCCATAATGATGCAGAAA---AGTAAT---TTTAAGGGC---CAAAGA-
+--AGAGTTGTTAAATGTTTTAATTGTGGCAAAGAAGGACACATAGCCAGA
+AATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAGAGAAGG
+ACACCAAATGAAAGACTGCACT---------GAA---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAG---------GGG---
+AGGCCCGGAAATTTCCTTCAGAGC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+GGGTTCAGA---------------GAGGAGATA---ACCCCCTCT-----
+-------------------CCGAAGCAGGAGCAG----------------
+--------AAAGACGGG---------------------GAACTGTAC---
+---CCTCCC---TTA---GCTTCCCTCAAATCACTCTTTGGCAACGACCC
+TTAGTCACAATAA
+>Ref.F1.BR.93.93BR020_1.AF005494
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCTTGGGA
+AAAAATTCGGTTAAGGCCGGGGGGAAAGAAAAAATATAGACTAAAACATC
+TAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCACTTGATCCAGGCCTT
+CTAGAAACATCAGAAGGCTGTCGAAAAATAATAGGACAGTTACAACCATC
+CCTTCAGACAGGATCAGAAGAGCTCAAATCATTATATAATACAATAGCAG
+TCCTCTATTATGTACATCAAAAGGTAGAGGTAAAAGACACCAAGGAGGCT
+TTAGAGAAGCTAGAGGAAGAACAA------------AAC-----------
+----------AAA---------------GGTCGG------CAAAAG----
+-----------------------------ACACAGCAAGCGACTGCT---
+------------------------------GAAAAAGGG-----------
+--------------------------------------------------
+--------------------------------------------------
+---------GTCAGTCAAAATTACCCTATAGTACAGAATCTTCAGGGACA
+AATGGTACACCAGTCTTTATCACCTAGAACTTTAAATGCATGGGTAAAGG
+TGATAGAA---GAGAAGGCTTTTAGTCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGGGCCACTCCACAAGATTTAAACACCATGTTAAATAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGACACCATCAATG
+AGGAGGCTGCAGAATGGGACAGATTACATCCAACACAGGCAGGACCCATC
+CCCCCAGGTCAGATAAGGGAACCTAGGGGAAGTGATATAGCTGGAACTAC
+TAGTACCCTTCAGGAACAAATACAATGGATGACA---------GGCAACC
+CACCTGTC---CCAGTGGGAGAAATGTATAAAAGATGGATCATCCTAGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCGGCATTTTGGACATAAG
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGACAGGTTCTTTAAAA
+CCCTAAGAGCTGAGCAAGCTACACAGGAAGTAAAGGGTTGGATGACAGAC
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAGACCATTTTAAA
+AGCATTGGGACCAGGGGCTACACTAGAGGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTAGCCATAAGGCAAGAGTTTTGGCTGAGGCAATG---
+---------AGCCAAGCAACA------------------------AATAC
+AGCTATAATGATGCAGAAA---AGTAAC---TTTAAGGGC---CAAAGA-
+--AGAATTGTTAAATGCTTTAATTGTGGCAAAGAAGGACACATAGCCAAA
+AATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAGTGTGGAAGAGAGGG
+ACACCAAATGAAGGACTGCACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAG---------GGG---
+AGGCCCGGAAACTTCATCCAGAAC--------------------------
+----AGGCCA------GAGCCGTCAGCC----------------------
+-----------------------------CCGCCAGCA---GAGAGCTTC
+AGGTTCGGG---------------GAGGAGACA---ACCCCATCT-----
+-------------------CCGAAGCAGGAGCAG----------------
+--------AAAGACGAG---------------------GGACTGTAC---
+---CCTCCC---TTA---GCTTCCCTCAAATCACTCTTTGGCAACGACCC
+CTAGTCACAATAA
+>Ref.F1.FI.93.FIN9363.AF075703
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAACTAGATGCATGGGA
+AAAAATTCGGTTAAGGCCGGGGGGAAAGAAACAATATAGAATAAAACATC
+TAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCAATAGATCCTGGCCTT
+CTAGAAACATCAGAAGGCTGTCAAAAAATAATAGCACAGATACAGCCATC
+CATTCAGACAGGATCAGAAGAGCTTAGATCATTATATAACACAATAGCAG
+TCCTCTATTTTGTACATCAAAAGATAGAGGTAAAGGACACCAAGGAAGCT
+TTAGATAAGCTAGAGGAAGAACAA------------AAC-----------
+----------AAA---------------AGTCAG------CAAAAG----
+-----------------------------ACACAGCAAGCGGCAGCT---
+------------------------GCAGCTGACAAAGGG-----------
+--------------------------------------------------
+--------------------------------------------------
+---------GTCAGTCAAAATTACCCTATAGTACAGAATCTTCAGGGACA
+AATGGTACATCAGGCTATATCACCTAGAACTTTAAATGCATGGGTAAAGG
+TGATAGAA---GAGAAGGCTTTTAGCCCAGAAGTTATACCCATGTTTTCA
+GCATTATCAGAAGGGGCCACTCCACAAGATTTAAACACCATGCTAAATAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGACACCATTAATG
+AGGAAGCTGCAGAATGGGACAGATTACATCCAGTGCATGCAGGACCTATT
+CCACCAGGCCAGATGAGGGAACCTAGGGGAAGCGATATAGCTGGAACTAC
+TAGTACCCTTCAGGAACAAATACAATGGATGACA---------AGTAACC
+CACCTGTC---CCAGTGGGAGACATCTATAAAAGATGGATCATCCTAGGA
+TTAAATAAAATAGTAAGGATGTATAGCCCTGTCAGCATTTTGGACATAAG
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGACAGGTTCTTTAAAG
+CTCTAAGAGCTGAGCAAGCTACACAGGAAGTAAAGGGTTGGATGACAGAC
+ACCTTGTTGGTC---CAAAATGCGAATCCAGATTGTAAGATCATTTTAAA
+AGGATTGGGAATAGGGGCTACACTAGAAGAAATGATGACAGCATGTCGGG
+GAGTGGGAGGACCTGGCCATAAGGCAAGAATTTTGGCTGAGGCAATG---
+---------AGCCAAGCAAAT---------------------------AC
+AACCATAATGATGCAGAAA---AGTAAT---TTTAGGGGC---CAAAGA-
+--AGAATTGTTAAATGTTTTAATTGTGGCAAAGAAGGACACATAGCCAGA
+AATTGCAGGGCCCCCAGGAAAAAGGGCTGTTGGAAATGTGGACAAGAAGG
+GCACCAAATGAAAGACTGCACT---------GAA---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAG---------GGG---
+AGGCCCGGAAATTTCCTTCAGAGT--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCGCCAGCA---GAGAGCCTC
+GGGATCAGA---------------GAAGAGGTA---ACTCCCTCT-----
+-------------------CCGAGGCAGGAGCAG----------------
+--------AAAGAAGAG---------------------GGACAGTAC---
+---CCTCCC---TTA---GCTTCCCTCAAATCACTCTTTGGCAACGACCC
+CTAGTCACAATAA
+>Ref.F1.FR.96.96FR_MP411.AJ249238
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCATGGGA
+AAGAATTCGATTAAGACCGGGGGGAAAGAAAAAATATAGAATGAAGCATC
+TAGTATGGGCAAGCAGGGAGTTAGAACGATTTGCAGTTGATCCTGGACTT
+CTAGAAACACCAGAAGGCTGTAAGCAAATAATAAGACAGCTACAACCATC
+CCTTCAGACAGGATCAGAAGAGCTTAGATCATTGTTCAATACAGTAGCAG
+TTCTCTATTGTGTACATCAAAAGATAGAGATAAAGGACACCAAGGAAGCT
+TTAGAGAAGTTAGAGGAGGAACAA------------AAC-----------
+----------AAA---------------GGTCAG------CAAAAG----
+-----------------------------ACACAGCAAGCGGCAGCT---
+------------------------------GACAAAGGA-----------
+--------------------------------------------------
+--------------------------------------------------
+---------GTCAGTCAAAATTACCCTATAGTACAAAATCTTCAGGGACA
+GATGGTACATCAGCCTATATCACCTAGAACTTTAAATGCATGGGTAAAAG
+TGATAGAA---GAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTCTCA
+GCATTATCAGAAGGGGCCACCCCACAAGATTTAAACACCATGCTAAATAC
+AGTGGGTGGACATCAAGCAGCCATGCAAATGTTAAAAGACACCATCAATG
+AGGAAGCTGCAGAATGGGACAGATTACATCCAGCGCATGCAGGGCCTATC
+CTACCAGGCCAGATGAGAGAACCTAGGGGTAGTGACATAGCTGGAACTAC
+TAGTACCCTTCAGGAACAAATACAATGGATGACA---------AGCAACC
+CACCTGTC---CCAGTGGGAGACATCTATAAAAGATGGATCATCCTAGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTTAGCATTTTGGACATAAG
+ACAAGGGCCAAAGGAACCTTTTAGAGACTATGTGGACAGGTTCTTTAAAA
+CTCTAAGAGCTGAGCAAGCTTCACAGGAAGTAAAGAATTGGATGACAGAA
+AGCTTGCTGGTG---CAAAATTCGAACCCAGACTGTAAGACCATCTTAAA
+AGCATTAGGACCAGGGGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTGGCCATAAGGCAAGGGTTTTGGCTGAGGCCATG---
+---------AGCCAAGCAACA------------------------AATGC
+AGCTATAATGATGCAGAAA---AGTAAC---TATAAGGGC---CCAAGA-
+--AGATTTATTAAATGTTTTAATTGTGGCAAAGAAGGACACATAGCCAAA
+AATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAAGAAGG
+ACATCAAATGAAAGACTGCACT---------GAA---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAG---------GGG---
+AGGCCCGGAAATTTTCTTCAGAAC--------------------------
+----AGGCCA------GAGCCAACAGCC----------------------
+-----------------------------CCGCCAGCG---GAGAGCTTC
+GGGTTCAAA---------------GAGGAAATA---ACCCCCTCT-----
+-------------------CCGAAGCAGGAGCAG----------------
+--------AAGGACGAG------------GGACAG---GGACTGTAT---
+---CCTCCC---TTA---GCCTCCCTCAAATCACTTTTTGGCAGCGACCC
+TTAGTCACCATAA
+>Ref.F2.CM.02.02CM_0016BBY.AY371158
+------GCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGACTGGGA
+GAAAATTCGGTTAAGGCCGGGAGGGAAGAAAAAATATAGGCTAAAACATA
+TAGTATGGGCAAGCAAGGAGCTAGAACGATTTGCACTTAATCCTGGCCTT
+TTAGAGACAACAGAAGGCTGTAAACAAATAATAGGACAACTACAATCATC
+CCTTCAGACAGGATCAGAAGAGATTAAATCATTATATAACACAGTAGCAG
+TCCTCTATTATGTACATCAAAAGATACAAATAAGAGACACCAAGGAAGCT
+TTAGATAAGCTACAGGAAGAACAA------------GAC-----------
+----------AAA---------------TATCAG------CAAAAA----
+-----------------------------ACACAACCAGCAGCGGCT---
+------------------------------GATAAAGGG-----------
+--------------------------------------------------
+--------------------------------------------------
+---------GTCAGTCAAAATTACCCTATAGTACAGAATCTTCAGGGGCA
+AATGGTACATCAGGCTATATCACCTAGAACTCTAAATGCATGGGTAAAAG
+TAATAGAA---GAGAAGGCTTTCAGCCCAGAAGTCATACCCATGTTTTCA
+GCATTATCAGAAGGGGCCACCCCACAAGATTTAAACACCATGCTAAACAC
+AGTAGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGATACCATCAATG
+AGGAAGCTGCAGAATGGGACAGGTTACATCCAGTGCAGGCAGGACCTATC
+CCACCAGGTCAGATAAGAGAACCTAGGGGAAGTGATATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAGCATGGATGACA---------AGCAACC
+CACCTGTC---CCAGTAGGAGAAATTTATAAAAGATGGATAATCCTAGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAG
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGACAGGTTCTTTAAAA
+CTCTAAGAGCTGAGCAAGCTACACAGGAAGTAAAAGGCTGGATGACAGAC
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAGACCATTTTAAA
+AGCACTAGGACCAGGGGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTGGCCATAAAGCAAGAATTTTGGCTGAGGCAATG---
+---------AGCCAAGTAACA------------------------GCTAC
+ATCCGTACTGATGCAGAAA---AGCAAC---TTTAAGGGC---CAAAAA-
+--AGAATTGTCAAGTGTTTCAACTGTGGCAAAGAAGGACATATAGCTAAA
+AATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGG
+ACACCAAATGAAAGACTGCACT---------GAA---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAA---------GGG---
+AGGCCTGGGAATTTCATTCAGAGC--------------------------
+----AGACCA------GAGCCAACGGCC----------------------
+-----------------------------CCGCCAGCA---GAGGGCTTC
+GGGTTCGGA---------------GAAGAGATA---ACTCCCTCC-----
+-------------------CCGAAGCAGGAGCAG----------------
+--------AAAGACAAG---------------------GGACTGTAT---
+---CCTCCC---TTG---ACTTCCCTCAAATCACTCTTTGGCAACGACCC
+GTAGTCACAATAA
+>Ref.F2.CM.95.95CM_MP255.AJ249236
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCATGGGA
+AAAAATTCGGTTAAAGCCGGGGGGAAAGAAAAGATATAGGCTAAAACATC
+TAGTATGGGCAAGCAGGGAACTAGAACGATTTGCACTTAATCCTAGCCTT
+TTAGAAACAACAGAAGGCTGTAAGAAAATAATAGGACAATTACAATCATC
+CCTTCAGACAGGATCAGAAGAGCTTAAATCACTATACAATGCAGTAGTAG
+TTCTCTATTATGTACATCAAAGGATAGATGTAAGAGACACCAAGGAAGCT
+TTAGATAAGCTACAGGAAGAACAA------------GAT-----------
+----------AAA---------------AGTCAG------CAAAAG----
+-----------------------------GAACAACAAAAGGCGGCT---
+------------------------------GACAAAGAG-----------
+--------------------------------------------------
+--------------------------------------------------
+---------GTCAGTCAAAATTACCCTATAGTGCAGAATATTCAGGGGCA
+AATGGTACACCAGGCTCTATCACCTAGAACTTTAAATGCATGGGTAAAAG
+TAATAGAA---GAGAAGGCTTTCAGTCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGGGCCACCCCACAAGATTTAAATACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGATACTATCAATG
+AGGAGGCTGCAGAATGGGACAGGTTACATCCAGTGCATGCAGGGCCTATC
+CCACCAGGGCAGATGAGAGAACCTAGGGGAAGTGATATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAATAACATGGATGACA---------GGCAACC
+CACCCGTC---CCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAA
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGACAGGTTCTTTAAAA
+CTCTAAGAGCTGAACAAGCTACACAGGAGGTAAAAAACTGGATGACAGAA
+ACCTTGTTGGTC---CAAAATTCGAACCCAGATTGTAAGACAATTTTAAA
+AGCATTGGGACCAGGGGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTGGCCATAAAGCAAGAATTCTGGCTGAGGCAATG---
+---------AGCAAAGCAACA------------------------AGTAC
+AGCCATAATGATGCAGAAA---AGCAAC---TTTAAGGGC---CAAAAA-
+--AGAATTGTTAAGTGTTTCAACTGTGGCAAAGAAGGACATATAGCTAGA
+AATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGG
+ACACCAAATGAAAGACTGCACT---------GAA---AGGCAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAGG---------GGG---
+AGGCCAGGAAATTTTCTTCAGAAC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCGCCAGCA---GAGAACTTC
+GGGTTCGGA---------------GAGGGGATA---ACCCCCTCC-----
+-------------------CCGAAGCAGGAGCAG----------------
+--------AAAGGCGAG---------------------GAACAGGCT---
+---CCTCCC---TTA---GTTTCCCTCAAATCACTCTTTGGCAGCGACCC
+TTAGTCGCAATAA
+>Ref.F2.CM.95.95CM_MP257.AJ249237
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCATGGGA
+AAAAATTCGGTTAAGGCCGGGGGGGAAGAAAAAATATAGGCTGAAACATA
+TAGTATGGGCAAGCAGGGAGCTAAAACGATTTGCACTTAATCCTGGCCTT
+TTAGAGACAACAGAAGGCTGTAAGAAAATAATAGGACAACTACAACCATC
+CCTTCAGACAGGGTCAGAGGAACTGAAATCATTATTTAACACAATAGTAG
+TTCTCTATTATGTACATCAAAAGATAGAGGTAAGAGACACCAAGGAAGCT
+TTAGATAAGCTACAGGAAGAACAA------------GAC-----------
+----------AAA---------------CATCAG------CAAAAA----
+-----------------------------ACACAACAAGCAACGGCT---
+------------------------------GACAAAGGG-----------
+--------------------------------------------------
+-----------------------------GTCAGTAAAGGG---------
+---------GTCAGTCAAAATTACCCTATACTACAAAATCTTCAGGGGCA
+AATGGTACACCAGAGTCTATCACCTAGAACTTTAAATGCATGGGTAAAAG
+TAATAGAA---GAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGGGCCACCCCACAAGATTTAAACACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGATACCATCAATG
+AGGAAGCTGCAGAATGGGACAGGTTACATCCAGTGCATGCAGGACCTATC
+CCACCAGGTCAGATGAGAGAACCAAGGGGAAGTGATATAGCAGGAACCAC
+TAGTACCCTTCAGGAACAAATAGCATGGATGACA---------AGCAACC
+CACCTGTC---CCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAA
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGACAGGTTCTTTAAAA
+CTCTAAGAGCTGAGCAAGCCACGCAGGAAGTAAAAGGCTGGATGACAGAA
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAGACCATTTTAAA
+AGCATTGGGGCCAGGGGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GTGTGGGAGGACCTAGCCATAAAGCAAGAATTTTGGCTGAGGCAATG---
+---------AGCAAAGCAACA------------------------GGTGC
+AGCCATAATGATGCAGAAG---AGCAAC---TTTAAGGGC---CAAAGA-
+--AGAATTGTTAAGTGTTTTAACTGTGGCAAAGAAGGACATATAGCTAGA
+AATTGCAGGGCCCCTAGAAAAAGGGGCTGCTGGAAATGTGGACAGGAAGG
+ACACCAAATGAAAGACTGCACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAAAATGTGG---CCT---TCCAACAAG---------GGG---
+AGGCCCGGAAATTTTCTTCAGAAC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCGCCAGCA---GAGAGCTTC
+GGGTTCGGG---------------GAGGAGATA---GCTCCCTCC-----
+-------------------CCGAAGCAGGAGCAG----------------
+--------AAAGACAAG---------------------GAACAGGTT---
+---CCTCCC---TTG---ATTTCCCTCAAATCACTCTTTGGCAGCGACCA
+GTAGTCACAATAA
+>Ref.F2.CM.97.CM53657.AF377956
+---------AGAGCGTCACTATTAAGCGGGGGAAAATTAGATGATTTGGA
+AAAAATTCGGTTAAGGCCAGGGGGGAAGAAAAAATATAGGCTGAAACATA
+TAGTATGGGCAAGCAGGGAGCTAGAAAGATTTGCACTTAATCCTGGCCTT
+TTAGAGACAAAGGAAGGCTGTAAACAAATAATAGGACAACTACAACCATC
+CCTTCAGACAGGATCAGAAGAGCTTAAATCATTATTCAACACAATAGTAG
+TCCTCTATTATGTACATCAAAGGATAAAAATAGGAGACACCAAGGAAGCT
+TTAGATAAGCTACAGGAAGAACAA------------GAC-----------
+----------AAA---------------AGTCAG------CAAAAA----
+-----------------------------ACACAACCAGCAGCGGCT---
+------------------------------GACAAAGGG-----------
+--------------------------------------------------
+--------------------------------------------------
+---------GTCAGTCAAAATTACCCTATAGTACAGAATCTTCAGGGACA
+AATGGTACACCAGTCTCTATCACCTAGAACTTTAAATGCATGGGTAAAAG
+TAATAGAA---GAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAACACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGATACCATCAATG
+AGGAAGCTGCAGAATGGGACAGATTACATCCAGTGCAGGCAGGACCCATC
+CCACCAGGTCAGATAAGAGAACCTAGGGGAAGTGATATAGCAGGAACTAC
+TAGCAACCTACAGGAACAAATAGCATGGATGACA---------AGCAACC
+CACCTGTC---CCAGTAGGAGAAATCTATAAAAGATGGATAATCCTAGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAA
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGACAGGTTCTTTAAAA
+CTCTAAGAGCTGAGCAAGCTTCACAGGAAGTAAAAGGCTGGATGACAGAC
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAGATCATTTTAAA
+AGGATTAGGAACAGGGGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GGGTGGGGGGACCTGGCCATAAGGCAAGAATTTTGGCTGAGGCAATG---
+---------AGCCAAGTAACA------------------------TCTAC
+ATCCATATTGATGCAGAAA---AGCAAC---TTTAAGGGC---CAAAGA-
+--AGAAATGTTAAGTGTTTCAACTGTGGCAAAGAAGGACATATAGCTAAA
+AATTGCAGGGCCCCTAGAAAAAGGGGCTGTTGGAAATGTGGAAAGGAAGG
+ACACCAAATGAAAGACTGCACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAG---------GGG---
+AGGCCTGGAAATTTTCTTCAGAAC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCGCCAGCA---GAAAGCTTC
+GGGTTCGGA---------------GAAGAGATA---ACTCCCTCC-----
+-------------------CCGAAGCAGGAGCAG----------------
+--------AAAGACAAG---------------------GAAATGTAC---
+---CCTCCC---TTG---ACTTCCCTCAAATCACTCTTTGGCAACGACCC
+TTAGTCACAATAA
+>Ref.G.BE.96.DRCBL.AF084936
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCTTGGGA
+GAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAGATATAGAATGAAACATT
+TAGTATGGGCAAGCAGGGAGCTGGACAGATTTGCACTTAACCCTGGCCTT
+TTAGAAACAGCAGAAGGTTGTCAAAAAATAATGGCACAGTTGCAACCAGC
+TCTCCAAACAGGAACAGAGGAGATTAAATCACTATTTAATACAGTAGCAA
+CCCTCTATTGTGTACATCAAAAGATAGAGGTAAGAGACACCAAAGAGGCT
+CTAGAGGAAGTGGAAAAGATACAA------------AAG-----------
+----------AAG---------------AGTCAG------CAAAAA----
+--------------------------------------------------
+------------------------------GAA---AAC-----------
+--------------------------------------------------
+-----------------------------AGCAGCAGCCAA---------
+---------GTCAGTCAAAATTACCCTATAGTGCAGAATGCACAAGGGCA
+AATGGTACACCAGGCCATATCACCTAGAACTTTGAATGCATGGGTAAAAG
+TAGTAGAA---GAAAAGGCCTTCAGTCCAGAAGTAATACCCATGTTTACA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAATAC
+AGTGGGGGGGCATCAAGCAGCTATGCAAATGTTAAAGGAGACTATCAATG
+ATGAAGCTGCAGAATGGGACAGGCTACATCCACAGCAGGCAGGGCCTATT
+GCACCAGGCCAGATAAGGGACCCAACGGGAAGTGATATAGCAGGAGCTAC
+TAGTACCCTGCAGGAACAGATAAGATGGATGACC---------AGCAACC
+CACCTGTC---CCAGTGGGAGAAATTTATAAAAGATGGATAATCCTGGGG
+TTAAATAAAATAGTAAGAATGTACAGCCCTGTCAGCATTTTGGACATAAG
+ACAGGGGCCAAAAGAACCCTTTAGAGATTATGTGGATAGATTCTTTAAAA
+CCCTGAGAGCTGAGCAAGCTACACAGGAAGTAAAAAGCTGGATGACAGAC
+ACCTTGTTGATC---CAAAATGCAAACCCAGATTGTAAGATCATCTTAAA
+AGGATTAGGACAAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTAGCTGAGGCAATG---
+---------AGCCAGGCATCAGGTGCA------------------GCAGC
+AGCCATAATGATGCAGAAA---AGCAAT---TTCAAGGGC---CCAAGA-
+--AGAACAATTAAATGTTTCAACTGTGGCAAGGAAGGACATCTAGCCAGA
+AATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAGGG
+ACATCAAATGAAAGAATGCACA---------GAA---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAG---------GGG---
+AGGCCAGGGAATTTCCTTCAGAAC--------------------------
+----AGGCCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCCGCA---GAGAACTTC
+GGGTTCGGG---------------GAGGAGATA---GCCCCCTCC-----
+-------------------CCGAAGCAGGAGCAG----------------
+--------AAGGAAAAG---------------------GAACTATAT---
+------CCT---CTA---TCTTCCCTCAAATCACTCTTTGGCAACGACCA
+ATAGTCAAAGTAA
+>Ref.G.KE.93.HH8793_12_1.AF061641
+ATGGGTGCGAGAGCSTCASTATTAAGCGGGGGAAAATTAGATGCATGGGA
+AAAAATTCGSCTGACGCCAGGGGGAAAGAAAAAATACAGACTGAAACATC
+TAGTATGGGCAAGCAGAGAGATGGAGAGATTTGCACTTAACCCTGGCCTT
+TTAGAAACAGCAGAAGGTTGTCAACAAATAATGAGCCAGTTGCAACCAGC
+TATCCAMACAGGAACAGAGGAGATTAAATCATTATTTAATACAGTAGCAA
+CCCTCTATTGTGTACATCCCAAGATAGAGGTAAAGGACACCAAAGAAGCT
+CTAGAGGAAGTAGAAAAGATACAA------------AAG-----------
+----------AAA---------------AGTCAG------CAAAAA----
+-----------------------------ATACAGCAGGCAGCAAGG---
+------------------------------GATGAAGGA-----------
+--------------------------------------------------
+-----------------------------AACAGCAGCCAA---------
+---------GTCAGCCAAAATTATCCTATAGTGCAGAACGCACAAGGACA
+GATGGTACACCAGGCCATATCACCTAGAACTTTAAATGCATGGGTAAAAG
+TAGTAGAA---GAAAAGGCCTTCAGTCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGATTTAAATACCATGCTAAACAC
+AGTGGGGGGGCATCAAGCAGCTATGCAAATGCTAAAAGATACTATCAATG
+AGGAAGCTGCAGAGTGGGACAGAATACATCCACCACAGGCAGGGCCTATT
+CCACCAGGCCAAATAAGAGAACCAAGGGGAAGTGATATAGCAGGAACCAC
+TAGTAACCTGCAGGAACAAATAAGATGGATGACC---------AGCAACC
+CACCTATC---CCAGTGGGAGAAATTTATAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAG
+ACAAGGGCCAAAAGAACCCTTTAGAGATTATGTAGACAGGTTCTTTAAAA
+CTTTAAGAGCTGAGCAAGCTACACAGGAAGTAAAAGGCTGGATGACAGAC
+ACCTTGTTGGTC---CAAAATGCGAACCCAGATTGTAAGACTATCTTAAG
+AGCATTAGGACCCGGAGCTACACTAGAAGAAATGATGACAGCATGCCAGG
+GAGTGGGAGGACCCGGCCATAAAGCAAGAGTGTTAGCTGAGGCAATG---
+---------AGCCAGGCAACAGGTGCA------------------GCAGC
+AGCCATAATGATGCAGAAA---AGCAAC---TTTAAGGGC---CCGAAA-
+--AGAAATATCAAGTGTTTCAATTGTGGCAAGGAAGGACACTTAGCCAGA
+AATTGCAGGGCCCCTAGAAAAAAGGGCTGCTGGAAATGTGGAAAGGAGGG
+ACATCAAATGAAAGACTGCACG---------GAA---AGACAGGCTAATT
+TTT-AGGGAAAATTTGG---CCT---TCCAACAAG---------GGG---
+AGGCCAGGGAATTTTCTTCAGAAC--------------------------
+----AGGCCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCCGCA---GAGAGCTTC
+GGGTTCGGA---------------GAGGAAATA---GCCCCCTCC-----
+-------------------CCGAAGCCAGAGCCG----------------
+--------AAGGAAAAG---------------------GAGATACAT---
+------CCC---TTA---GCTTCCCTCAAATCACTCTTTGGCAGCGACCC
+CTAGTCACAGTAA
+>Ref.G.NG.92.92NG083.U88826
+GTGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATTCTTGGGA
+AAAAATTCGGTTAAGGCCAGGGGGAAGGAAAAAGTATAAACTAAAACATA
+TAGTATGGGCAAGCAGGGAACTGGGGAGATTTGCACTTAACCGTGACCTT
+TTAGAAACAGCAGAAGGTTGTGTGCAAATAATGAAACAGTTGCAACCAGC
+TCTCTAGACAGGAACAGAGGAGCTTAGATCATTATTTAATACAGTAGCAA
+CCCTCTACTGTGTACATCAAAAGATAGAGGTAAAAGACACCAAAGAAGCT
+CCAGAGGAAGTGGAAAAAATACAA------------AAG-----------
+----------AAC---------------AGTCAG------CAAGAA----
+-----------------------------ATACAGCAGGCAGCAAAG---
+------------------------------AATGAAGGA-----------
+--------------------------------------------------
+-----------------------------AACAGTAACCCA---------
+---------GTCAGCCAAAATTATCCTATAGTGCAGAATGCACAAGGGCA
+AATGATACATCAGGCCATATCACCTAGGACTTTGAATGCGTGGGTAAAAG
+TAGTAGAA---GAAAAGGCCTTCAGTCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAGGGAGCCACCCCACAAGATTTAAATACCATGCTAAATAC
+AGTGGGGGGGCATCAAGCAGCTATGCAAATGCTAAAGGATACTATCAATG
+ATGAAGCTGCAGAGTGGGACAGGATACATCCACAGCAGGCAGGGCCTATT
+CCACCAGGCCAAATAAGAGAGCCTAGTGGAAGTGATATAGCAGGAACTAC
+TAGTACCCTGCAGGAACAAATAAGATGGATGACC---------AGCAACC
+CACCTATC---CCAGTGGGAGAAATCTATAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTGAGAATGTATAGCCCTGTCAGCATTTTGGACATAAG
+ACAAGGGCCAAAAGAACCCTTTAGAGATTATGTAGATAGGTTCTTTAAAA
+CTTTGAGAGCTGAGCAAGCTACACAGGAAGTAAAAGGTTGGATGACAGAC
+ACCTTGTTGGTT---CAAAATGCGAACCCAGATTGTAAAACCATCTTAAG
+AGCATTAGGACCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCCAGCCATAAAGCAAGAGTTTTAGCTGAGGCAATG---
+---------AGCCAGGCATCAGGTGCA------------GCA---GCAGC
+AGCCATAATGATGCAGAAA---AGCAAT---TTTAAGGGC---CCGAGA-
+--AGAATTATTAAGTGTTTCAACTGTGGCAAGGAAGGACATCTAGCCAGA
+AATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAAGGAGGG
+ACATCAAATGAAAGAATGCACG---------GAA---AGGCAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAG---------GGG---
+AGGCCAGGAAACTTTCTCCAGAAC--------------------------
+----AGGACA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+GGATTCGGA---------------GAGGAGATA---GCCCCCTCC-----
+-------------------CCGAAGCAGGAGCCA----------------
+--------AAGGAGAAG---------------------GAGCTATAT---
+------CCC---TTA---ACTTCCCTCAAATCACTCTTTGGCAGCGACCC
+CTAGTCACAGTAA
+>Ref.G.PT.x.PT2695.AY612637
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCATGGGA
+AAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAGAATGAAACATT
+TAGTATGGGCAAGCAGGGAGCTGGAAAAATTTGCACTCAACCCTGACCTT
+TTAGAAACAGCAGAAGGTTGTCAACAAATAATGAGACAGTTACAACCAGC
+TCTCCAGACAGGAACAGAGGAGCTTAGATCATTATTTAATACAGTAGCAA
+CACTCTATTGTGTACATCAAAGGATAGAGGTAAAAGACACCAAAGAAGCT
+CTAGAGGAAGTGGAAAAGACACAG------------AAG-----------
+----------AAA---------------AGTCAG------AAACAA----
+--------------------------------CAGCAGGCAGCAATG---
+------------------------------GACGAAGGA-----------
+--------------------------------------------------
+-----------------------------AACAGCAGCCAA---------
+---------GTCAGCCAAAATTATCCTATAGTGCAGAATGCACAAGGGCA
+AATGGTACATCAGGCCATATCACCCAGAACTTTAAATGCATGGGTAAAAG
+TAGTAGAA---GAAAAGGCCTTCAGTCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCGCAAGATTTAAATACCATGCTAAACAC
+AGTAGGGGGGCATCAAGCAGCTATGCAAATGCTAAAGGATACTATTAATG
+AGGAAGCTGCAGAGTGGGACAGGATACATCCACAACAGGCAGGGCCTATC
+CCACCAGGCCAGATAAGAGAACCAAGGGGAAGTGATATAGCAGGAACTAC
+TAGTACCCTGCAGGAACAAATAAGATGGATGACC---------AGCAATC
+CACCTATC---CCAGTGGGAGAAATTTATAAAAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGATATAAG
+ACAAGGGCCAAAAGAACCTTTTAGAGATTATGTAGATAGGTTCTTTAAAA
+CTTTAAGAGCTGAACAAGCTACACAGGAAGTAAAAGGCTGGATGACAGAC
+ACCTTGCTGGTC---CAAAATGCGAACCCAGATTGTAAGACCATCTTAAG
+AGCATTAGGACCAGGAGCTTCACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCCAGTCACAAAGCAAGAGTTTTAGCTGAGGCAATG---
+---------AGCCAGGCATCA------------------------GGGGC
+AACAATAATGATGCAAAAA---AGCAAC---TTTAAGGGT---CCAAAA-
+--AGAATGATTAAGTGTTTCAACTGTGGCAAGGAAGGACACCTAGCTAGA
+AATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAGGAGGG
+ACACCAAATGAAAGACTGCACA---------GAG---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAACAAG---------GGG---
+AGGCCAGGGAATTTTCTCCAGAAC--------------------------
+----AGGCCC------GAGCCAACAGCC----------------------
+-----------------------------CCACCCGCA---GAGAGCTTC
+GGGTTCGGA---------------GAGGAGATA---GCCCCCTCC-----
+-------------------CCGAAGCAAGAGCCG----------------
+--------AAGGACAAG---------------------GAGTTATAC---
+------CCC---TTA---ACCTCCCTCAAATCACTCTTTGGCAGCGACCC
+CTAGTCACAGTAA
+>Ref.H.BE.93.VI991.AF190127
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCTTGGGA
+GAAAATTCGGTTAAGGCCAGGGGGAAGGAAAAAATATAGGCTAAAACATC
+TGGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTAACCCCGACCTT
+TTAGAAACAGCAGATGGCTGCCAACAAATACTAGGACAGCTACAGCCAGC
+TCTTAAGACAGGAACAGAAGACCTTCAATCATTATATAATACAATAGCAG
+TCCTCTATTGCGTACATCAAAGAATAGATGTGAAAGACACCAAGGAAGCT
+TTAGGGAAGATAGAGGAAATACAG------------AAT-----------
+----------AAG---------------AACAAG------CAAAGA----
+-----------------------------ACACAGCAGGCCCCAGCA---
+------------------------GCAGCTGATAAAGAA-----------
+--------------------------------------------------
+-----------------------------AAGGACAGCAAG---------
+---------ATCAGTCAAAATTATCCTATAGTACAGAATGCCCAGGGGCA
+AATGGTACACCAGGCAATATCACCTAGGACCTTAAATGCATGGGTAAAAG
+TAGTAGAA---GAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGACTTAAATGCCATGCTAAATAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGATACAATCAATG
+AGGAAGCTGCAGAATGGGATAGGCTACATCCAGTACATGCAGGGCCTATT
+CCACCAGGCCAGATGAGAGAACCAAGGGGAAGTGATATAGCAGGAACTAC
+TAGTACCCTTCAGGAACAAGTAGCATGGATGACA---------GGCAATC
+CCCCAATT---CCAGTGGGAGACATCTATAAGAGATGGATAATCCTGGGA
+TTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAA
+ACAAGGACCAAAAGAACCCTTCAGAGACTATGTAGACAGGTTCTTTAGAG
+TTTTAAGAGCTGAGCAAGCTACACAGGATGTAAAAAACTGGATGACAGAC
+ACCTTGTTGGTC---CAAAATGCGAATCCAGATTGCAGGACTATTTTAAA
+AGCATTAGGACGAGGGGCTTCAATAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCAAGCCATAAAGCAAGAGTTTTGGCTGAGGCAATG---
+---------AGCCAAGTAACAAATGCA------------------AGTGC
+AGCCATAATGATGCAGAAA---GGCAAC---TTTAAGGGC---CCAAGA-
+--AGAACTGTTAAATGTTCCAACTGTGGCAAAGAAGGACACATAGCCAGA
+AATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGACAGGAAGG
+ACACCAGATGAAAGACTGCACA---------GGA---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAGCAAG---------GGA---
+AGGCCAGGGAATTTCCCCCAGAAG--------------------------
+----AGGCTA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+GGGTTCGGA---------------GAGGAGATC---ACCCCCTCT-----
+-------------------CCGAGGCAGGAGCTG----------------
+--------AAAGAACAG---------------------GAACCT------
+------CCT---TTA---ACTTCCCTCAGATCACTCTTTGGCAACGACCA
+ATAGTCACAGTAA
+>Ref.H.BE.93.VI997.AF190128
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGACGATTAGATACTTTGGA
+GAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGGCTAAAACATA
+TAGTATGGGCAAGCAGAGAGCTGGAAAGATTTGCACTTAACCCCGGCCTT
+TTAGAATCAGCAGAAGGCTGTCTACAAATAATAGAACAACTACGGCCATC
+TATTAAGACAGGAACAGAAGAACTTCWATCATTATTTAATACCGTAGCGA
+CCCTCTATTGCGTACTTCAAAGAATAGAGGTAAAAGACACCAAGGAAGCT
+TTAGGGAAGATAGAGGAAATACAA------------AAC-----------
+----------AAA---------------AGGCAG------CAAAAA----
+-----------------------------ACACAGCAAGCAACAGCT---
+------------------------------AATAAGGAA-----------
+--------------------------------------------------
+-----------------------------AGAGACAACAAG---------
+---------GTCAGTCAAAATTATCCTATAGTACAGAATGCTCAAGGGCA
+GATGGTACACCAGCCCATATCACVTAGGACCTTAAATGCATGGGTAAAAG
+TAGTAGAG---GAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGACTTAAATGCTATGCTAAATAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGATACAATCAATG
+AGGAAGCTGCAGAATGGGATAGGCTACATCCAGTGCATGCAGGGCCTATT
+CCACCAGGCCAGATGAGAGAACCAAGGGGAAGCGATATAGCTGGAACTAC
+TAGTACCCTTCAGGAACAAATAGCATGGATGACA---------GGCAATC
+CAAGTATC---CCAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGA
+TTAAATAAGATAGTAAGAATGTATAGTCCTGTTAGTATTCTGGACATAAA
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGACAGGTTCTTTAAAA
+CTTTAAGAGCTGAGCAAGCCACACAGGAGGTGAAGAATTGGATGACAGAC
+ACCTTGTTGGTC---CAGAATGCAAATCCAGATTGCAAGACTATTTTAAG
+AGCATTAGGACAAGGGGCTTCAATAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTAGTCATAAAGCAAGAGTTTTGGCTGAGGCAATG---
+---------AGCCAAGTAACAAATGCA------------------AATGC
+AGCCATAATGATGCAGAAA---AGCAAC---TTTAAGGGC---CCAAGA-
+--AAAATTGTTAAATGTTTCAATTGTGGCAAAGAGGGACACATAGCCAGA
+AATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGGGAAGG
+ACATCAGATGAAGGACTGCACA---------GAG---AGACAGGCTAATT
+TTTTAGGGAAAATCTGG---CCT---TCCAGCAAA---------GGG---
+AGGCCAGGAAATTTTCTCCAGAGC--------------------------
+----AGGCCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+GGGTTCGGG---------------GAGGAGATG---ACCTCCTCC-----
+-------------------CCGAAGCAGGAGCTG----------------
+--------AAGGACAAG---------------------GAACCT------
+------CCC---TTT---GCTTCCCTCAAATCACTCTTTGGCAACGACCC
+CTTGTCACAGTAA
+>Ref.H.CF.90.056.AF005496
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAAAATTAGATGCTTGGGA
+GAAAATTCGGCTAAGGCCAGGGGGAAAGAAAAAATATAGGCTAAAACATC
+TAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTAACCCCGGCCTT
+TTAGAAACACCAGAAGGCTGTCTACAGATAATAGAACAGATACAGCCAGC
+TATTAAGACAGGAACAGAAGAACTTAAATCATTATTTAATCTAGTAGCAG
+TCCTCTATTGCGTACATCGAAAAATAGATGTGAAAGACACCAAGGAGGCT
+TTAGATAAGATAGAGGAAATACAA------------AAC-----------
+----------AAA---------------AGTCAG------CAAAAA----
+-----------------------------ACACAGCAAGCAGCAGCT---
+------------------------------GATAAGGAA-----------
+--------------------------------------------------
+-----------------------------AAAGACAACAAG---------
+---------GTCAGTCAAAATTATCCTATAGTACAGAATGCTCAAGGGCA
+GATGGTACACCAGGCCATATCACCTAGGACCTTAAATGCATGGGTAAAAG
+TAGTAGAA---GAAAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCACAAGACTTAAATGCTATGCTAAATAC
+AGTGGGGGGACATCAAGCAGCCATGCAGATGTTAAAAGATACAATCAATG
+AGGAAGCTGCAGAATGGGACAGGGTACATCCAGTGCATGCAGGGCCTATT
+CCACCAGGCCAAATGAGAGAACCAAGGGGAAGCGATATAGCAGGAACTAC
+TAGTACCCTGCAGGAACAAATAGCATGGATGACA---------GGCAATC
+CAGCTATC---CCAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGA
+TTAAATAAGATAGTAAGAATGTATAGTCCTGTCAGCATTCTGGACATAAA
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGACAGGTTTTTTAAAA
+CTTTAAGAGCTGAGCAAGCCACACAGGATGTGAAGAATTGGATGACAGAA
+ACCTTGTTGGTC---CAAAATGCAAATCCAGATTGCAAGACTATATTAAG
+AGCATTAGGACAAGGGGCTTCAATAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTAGTCATAAAGCAAGAGTTTTGGCTGAGGCAATG---
+---------AGCCAAGTAACAAATACA------------------AATAC
+AGCCATAATGATGCAGAAA---GGCAAC---TTTAAGGGC---CAAAGA-
+--AAATTTGTTAAATGCTTCAACTGTGGCAAAGAGGGACACATAGCCAGA
+AATTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGAAGAGAAGG
+ACATCAGATGAAAGACTGCACA---------GAG---AGACAGGCTAATT
+TTTTAGGGAAAATTTGG---CCT---TCCAGCAAA---------GGG---
+AGGCCAGGAAATTTTCTCCAGAGC--------------------------
+----AGGCCA------GAACCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+GGGTTCGGA---------------GAGGAGATG---ACCCCCTCT-----
+-------------------CCGAAGCAGGAGCAG----------------
+-----CTGAAGGACAAG---------------------GAACCT------
+------CCC---TTA---GCTTCCCTCAGATCACTCTTTGGCAGCGACCC
+CTTGTTACAGTAA
+>Ref.H.GB.00.00GBAC4001.FJ711703
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCTTGGGA
+GAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGGCTAAAACATC
+TAGTATGGGCAAGCAGGGAGCTGGACAGATTTGCACTCAACCCCGACCTT
+TTAGAAACAGCAGATGGCTGTCTAAAAATAYTAGGACAGATACAGCCAGC
+TCTTCAGACAGGAACAGAAGAAATTAAATCGTTATTTAATCTAGTAGCAG
+TCCTCTATTGTGTACATCAGAAAATAGAGGTACAAGATACCAGTGAAGCT
+TTAAATAAGGTAAAGGAGATACAG------------AAC-----------
+----------AAG---------------AACCAG------CAAACA----
+-----------------------------ACACAGCAGGCAACAGCT---
+------------------------------RGTAAAGAG-----------
+--------------------------------------------------
+-----------------------------AAGGACAGCAAG---------
+---------ATCAGTCAAAATTATCCTATAGTACAGAATGCCCAAGGGCA
+AATGGTACACCAGGCCATATCACCTAGGACCTTAAATGCATGGGTAAAAG
+TAGTAGAA---GAGAAGGCTTTTAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGARGGAGCMACCCCACAAGACYTAAATACCATGTTAAACTC
+AGTGGGGGGACATCAGGCAGCCATGCAAATRTTAAAAGATACAATCAATG
+AGGAAGCTGCAGAATGGGATAGGACACATCCAGTGCATGCAGGGCCTATT
+CCACCAGGCCAGATGAGAGAACCAAGGGGAAGCGATATAGCAGGAACTAC
+TAGYAACCTTCAGGAACAAATAGCATGGATGACA---------RGYAATC
+CCCCTRTC---CCAGTGGGRGAKATCTATAAAAGATGGATAATCWTGGGA
+TTAAATAAAATAGTAAGAATGTATAGTCCTGTCAGCATTTTGGACATAAA
+ACAAGGGCCAAAAGAACCCTTTAGAGAYTATGTAGACAGGTTYTTTAAAA
+CTTTAAGAGCTGAGCAAGCCACACAGGACGTGAAGAATTGGATGACAGAC
+ACCTTGTTGGTC---CAAAATGCAAATCCAGATTGCAAGACTATTTTAAA
+AGCATTAGGACAAGGGGCTTCAATAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTGGCCATAAAGCAAGAGTTTTGGCTGAGGCAATG---
+---------AGTCAAATGACAAATATG------------------AATAC
+AGCCATAATGGTGCAGAAA---GGCAAC---TTTAAGGGC---CAAAGA-
+--AGAACTGTTAAATGTTTCAACTGTGGTAAAGAAGGACACATAGCAAGA
+AACTGCAGGGCCCCTAGGAAAAAGGGCTGTTGGAAATGTGGGAGGGAAGG
+ACATCAGATGAAAGACTGCACA---------GAG---AGACAGGCGAATT
+TTTTAGGGAAAATCTGG---CCT---TCCAGCAAA---------GGG---
+AGGCCAGGGAACTTTCTCCAGAGC--------------------------
+----AGGCCA------GAACCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAATGCTTC
+GGGTTCGGG---------------GAGGAGATG---ACACCCCCT-----
+-------------------CCGAAGCAGGAGATG----------------
+--------AAGGAAAAG---------------------GAACCT------
+------CCC---TTA---ACTTCCCTCAGATCACTCTTTGGCAGCGACCC
+CTTGTCACAGTAA
+>Ref.J.CD.97.J_97DC_KTB147.EF614151
+------------------------------------------GCATGGGA
+GAAAATTTGGCTGAGGCGACGGGGAGAGAAAAAATACAGGCGAAAACATA
+TAGTATGGGCAAGCAGGGAGCTGGACAGATATGCACTTAACCCTGGCCTT
+CTATAGTGAGCAGAAGGCGGTGAACAGATACTAGTACAGATCCAACCAGA
+TCTTTAAACAGGAGCAGAGGAGATAAAATCATTATTTAACACAGAAGCAA
+CCCTCTATTGTGTACATTAGAGGATAGACATAAGAGACACCAAGGAGGCT
+TTAGACAAGATAGAGGAACTTTAA------------AAC-----------
+----------AAA---------------AGCAAG------GAGAAA----
+-----------------------------GCTAAGAAAGAA---GCT---
+------------------------------GTCAAAAAA-----------
+--------------------------------------------------
+-----------------------------TACAACAGTCAG---------
+---------GTGAGTCACAATTATCCTATATTGCAAAATATGTAAGGGGA
+ACTAGTACACCAGGCCCTATCACCTAGAACATTAAATGCATGGGTAAAGG
+TGATAGAA---GAGAAAGGTTTCAACCCAGAAGTGATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACTCCACAAGATTTAAATACCATGCTAAACAC
+GGTGGGGGGACATCAAGCAGCGATGCAAATGTTAAAAGATACCATCAATG
+AGGAAGCTGCAGAATGGGACAGGCTCCATCCAGTACATGCAGGACCTGTT
+GCACCAGGTCAGATGAGAGAACCGAGGGGAAGTGATATAGCAGGAACTAC
+TAGTAACCTTCAGGAACAAATAGCATGGATAACAGGCAACGGTGGCAACC
+AACCTATC---CCAGTAGGAGAAATCTATAAAAGATGGATAATTTTAGGA
+TTAAATAAAATAGTGAGAATGTATAGCCCTGTCAGCATTTTGGACATAAG
+GCAAGGACCAAAAGAACCTTTTAGAGACTATGTGGATAGGTTCTTTAAAA
+CTCTAAGAGCCGAGCAAGCTACACAGGAGGTAAAAAATTGGATGACAGAT
+ACCTTGTTGGTC---CAAAATGCGAATCCAGATTGTAAGACCATTCTAAG
+AGCATTAGGAGCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGAGAGTATCCAGTCATAGAGCAAGAGTTTTTGGTGAGGCAATA---
+---------AGCCAAGTAAAC------------------------AATAC
+AAACATAATAATGCAAAGA---GGTAAC---TTTAGGGGC---CAGAAG-
+--AGAAGTGTTAAATGTTTCAACTGTGGTAAAGAGGGACACATAGCAAAA
+AATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGAAAGTGTGGAAAGGAAGG
+ACACCAAATAAAAAACTGCACT---------GAG---AGACAGGCCAATT
+TTTTAGGGAAAGTTTGG---CCT---TCCAGCAAG---------GGG---
+AGGCCAGGGAACTTTCTCCAGAGC--------------------------
+----AGGCCA------GAACCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAAAGCTTC
+GGGTTCGGG---------------GAGAAGATA---ACTCCCTCC-----
+-------------------CAGAAACAGGAACCG----------------
+--------AGGAAG------------------------GAACTATAT---
+------CCT---TCA---GCTTCCCTCAAATCACTCTTTGGCAGCGACCC
+CTCGTCACAGTAA
+>Ref.J.CM.04.04CMU11421.GU237072
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAAAATTAGATACTTGGGA
+GAAAATTCGGTTGAGGCCAGGGGGGAAGAAACGTTATAGGCTAAAACATT
+TAGTATGGGCAAGCAGGGAGCTGGACAGATTTGCACTTAACCCTGRCCTT
+CTAGAAACATCAAAAGGCTGTCAACAAATATTAGTACAGCTCCAACCATC
+TTATCAAACAGGGACAGAAGAAATTAAGTCATTATATAACACAGTAGCAA
+CCCTCTATTGCGTACATGAGGRSATAGAGGTAAAAGACACCAAGGAARCT
+TTAGACAAGATAGAGGAACTACAA------------AAG-----------
+----------AAG---------------AACAAG------CAACAG----
+-----------------------------GCACAGAAAGCAGAARCT---
+------------------------------GACAAAAGR-----------
+--------------------------------------------------
+-----------------------------RACARCAGTCAA---------
+---------GTCAGTCAAAATTATCCTATAGTGCAGAACATGCAAGGGCA
+ACCAGTACAYCARGCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAG
+TGRTAGAA---GARAAGGCTTTYAGCCCAGAAGTAATACCCATGTTTTCA
+GCYTTATCAGAAGGAGCCACCCCRCAAGATTTAAATACCATGCTAAATAC
+AGTRGGGGGACACCARGCAGCTATGCAAATGTTAAAAGATACTATCAATG
+AGGAAGCTGCAGAATGGGATAGGKTACATCCAGTACATGCAGGGCCTCCR
+GCACCAGGCCAGGCGAGAGAACCGAGGGGAAGTGATATAGCAGGAACTAC
+TAGTACCCTCCAGGAACAAATAGCATGGATGACA---------GGCAACC
+CACCTATC---CCAGTAGGGGAAATTTATAAAAGRTGGATAATTCTGGGA
+TTAAATAAAATAGTRAGAATGTATAGCCCTGTCAGCATTTTGGATATAAG
+ACAAGGACCAAAAGARCCTTTTAGAGAYTATGTAGATCGGTTCTTTAAAA
+CTCTAAGAGCTGAGCAAGCTACACAGGAAGTAAAAAATTGGATGACAGAT
+ACCTTGTTGATC---CAAAATGCAAATCCAGATTGCAGAACCATYTTAAA
+AGCATTAGGACCAGGAGCTACACTAGARGAAATGATGACAGCATGYCAGG
+GAGTGGGAGGACCTGGTCATAAAGCAAGAGTTTTGGCWGAAGCAATG---
+---------AGCCAAATGACCAAT---------------------ACAGC
+AAACATAATGATGCAAARG---GGTAAY---TTTAAGGGC---CARAAA-
+--AGAATGATTAAGTGTTTCAATTGTGGWAAACCAGGACACCTAGCMAGA
+AAYTGCAGAGCCCCTAGRAARAAGGGCTGTTGGAAATGTGGACAGGAAGG
+ACAYCAAATGAAAGACTGCACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAARATCTGG---CCT---TCCAACAAR---------GGG---
+AGGCCAGGGAACTTTCTYCAGAGC--------------------------
+----AGRCCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+GGGTTCGGG---------------GAAGAGACC------YCYYCC-----
+-------------------CCGAAACAAGAAGCG----------------
+--------AAG---------------------------GAACTGTAT---
+------CCT---CTA---ACTTCCCTCAAATCACTCTTTGGCARCGACCC
+CTYGTCACAATAA
+>Ref.J.SE.93.SE9280_7887.AF082394
+ATGGGTGCGAGAGCGTCAATATTAAGTGGGGGAAAATTAGATGATTGGGA
+AAAAATTCGGTTGAGGCCAGGGGGGAAGAAAAAATATAGGATAAAGCATC
+TAGTATGGGCAAGCAGGGAGCTGGACAGATTTGCACTTAACCCTGGCCTT
+CTAGAGTCAGCAAAAGGCTGTCAACAAATACTAGTACAGCTCCAACCAGC
+TCTCCAGACAGGAACACAAGAAATTAAATCATTGTATAATACAGTAGCAA
+CCCTCTATTGCGTACATCAGAGGATAGAAATAAAAGACACCATGGAAGCT
+TTAGAGAAGATAGAGGAAATTCAA------------AAC-----------
+----------AAG---------------AACAAA------CAGCAG----
+-----------------------------GCACAGAAAGCAGAAACT---
+------------------------------GACAAAAAA-----------
+--------------------------------------------------
+-----------------------------GACAACAGTCAG---------
+---------GTCAGTCAAAATTATCCTATAGTGCAGAATCTGCAAGGGCA
+ACCGGTACACCAGGCCCTATCACCTAGAACTTTAAATGCATGGGTAAAAG
+TGATAGAA---GAAAAAGCTTTCAGCCCAGAAGTGATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACCCCGCAAGATTTAAATACCATGCTAAACAC
+AATAGGGGGACACCAAGCAGCTATGCAAATGTTAAAAGATACTATCAATG
+AGGAAGCTGCAGAATGGGACAGGGTACATCCAGTACATGCAGGGCCTATT
+GCACCAGGCCAGGTGAGAGAACCAAGGGGAAGTGATATAGCAGGAACTAC
+TAGTACCCTCCAGGAACAAATAGGATGGATGACA---------GGCAATC
+CACCTATC---CCAGTAGGAGAGATTTATAAAAGATGGATAATTCTGGGA
+CTAAATAAAATAGTAAGAATGTATAGCCCTGTCAGTATTTTGGATATAAG
+ACAAGGACCAAAAGAACCTTTTAGAGACTATGTAGACAGGTTCTTTAAAG
+CTCTAAGAGCTGAGCAAGCTACACAGGATGTAAAAAATTGGATGACAGAT
+ACCTTGCTGGTC---CAAAATGCAAATCCAGATTGCAAGACCATTTTAAA
+AGCATTAGGATCAGGAGCTACACTAGAAGAAATGATGACAGCATGTCAGG
+GAGTGGGAGGACCTGGTCATAAGGCGAGAGTTTTGGCTGAAGCAATG---
+---------AGCCAAGTGACC------------------------AATAC
+CAACATAATGATGCAAAGA---GGTAAC---TTTAGGGAC---CATAAA-
+--AGAATTGTTAAGTGTTTCAATTGTGGCAAACAAGGACACATAGCAAAA
+AACTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGAAAGGAAGG
+ACACCAAATGAAAGACTGCACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAAGATTTGG---CCT---TCCAGCAAA---------GGG---
+AGGCCAGGGAACTTTCTCCAGAGC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCCTC
+GGGCTCGGA---------------GAGGAGATC------CCCTCC-----
+-------------------CCGAAACAGGAGCCG----------------
+--------AAGGACAAG---------------------GAACTGTAT---
+------CCT---CTA---ACTTCCCTCAAATCACTCTTTGGCAGCGACCC
+CTTGTCACAATAA
+>Ref.K.CD.97.97ZR_EQTB11.AJ249235
+ATGGGTGCGAGAGCTTCAGTATTAAGCGGGGGAAAATTAGACAAATGGGA
+AAAAATTCAGTTACGGCCAGGGGGAAAGAAAAAATACAGGCTAAAACATC
+TAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCACTTAACCCTAACCTT
+TTAGAGACAGTAGAAGGCTGTCGGCAAATAATAAGACAACTACAACCATC
+CCTTCAAACAGGCTCGGAAGAGCTTAGATCACTATTTAATACAGTAGCAA
+CCCTCTATTGGGTGCATCAAAGTATACAGGTAAGGGACACCAAGGAAGCC
+TTAGACAAACTAGAGGAAGAACAA------------AAC-----------
+----------AGA---------------ACTCAG------CAAAAG----
+-----------------------------ACACAGCAAGGAAAAGCT---
+------------------------------GACAAAGGG-----------
+--------------------------------------------------
+--------------------------------------------------
+---------GTTAGTCAAAATTACCCTATAGTACAGAATCTTCAGGGGCA
+AATGGTACACCAGGCCCTATCACCTAGAACTTTAAATGCATGGGTTAAAG
+TAATAGAA---GAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTTCA
+GCATTATCAGAAGGAGCCACTCCACAAGATTTAAACACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGATACTATCAATG
+AGGAAGCTGCAGAATGGGACAGGATGCACCCAGTGCAAGCAGGGCCTATC
+CCACCAGGCCAAATAAGAGAACCTAGGGGGAGTGATATAGCAGGAACTAC
+TAGCACTCTTCAGGAACAAATAACATGGATGACA---------AGCAACC
+CACCTATC---CCAGTGGGAGAAATCTATAAAAGATGGATAATCCTGGGG
+TTAAATAAAATAGTGAGAATGTATAGCCCTGTCAGCATTTTGGACATAAG
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGATAGGTTCTTTAGAG
+TTCTAAGAGCTGAACAAGCCACACAGGAAGTAAAAAATTGGATGACAGAA
+ACCCTGTTGGTC---CAAAACGCAAACCCAGATTGTAGGACCATTTTAAA
+GGCGTTGGGATCAGGGGCTACATTAGAAGAAATGATGACAGCATGTCAGG
+GAGTAGGAGGGCCTGGCCATAAAGCAAGGGTTTTGGCTGAGGCAATG---
+---------AGCCAGGTAACA------------------------AATTC
+AGCCGTAATGATGCAGAGA---GGCAAC---TTTAAGGGT---CAAAGA-
+--AGAATTATTAAGTGCTTCAACTGTGGCAAAGAAGGACACCTAGCCAGA
+AATTGTAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGGAAAGAAGG
+ACATCAGATGAAAGACTGTTCT---------GAG---AGACAGGCTAATT
+TTTTAGGGAAGTTCTGG---CCT---CTCAACAAA---------GAG---
+AGGCCAGGAAATTTTCTTCAGAAC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTT
+GGGTTCGGG---------------GAGAAGATA---ACCCCCTCT-----
+-------------------CTGAGACAGGAAATG----------------
+--------AAAGATCAG---------------------GAACAGGGT---
+---CCTCCT---TTA---ACTTCCCTCAAATCACTCTTTGGCAGCGACCC
+GTTGTCACAGTAA
+>Ref.K.CM.96.96CM_MP535.AJ249239
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCATGGGA
+AAAGATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAAACTGAAACATC
+TAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCACTCAACCCTGGCCTT
+TTAGAGACAACAGAAGGCTGTCGGCAAATAATAACACAAATACAGCCATC
+CATTCAAACAGGATCAGAAGAGATTAAATCACTATATAATACAATAGCAG
+TCCTCTATTTTGTACATCAAAAGATAGAGGTAAAAGACACCAAGGAAGCC
+TTAGACAAACTAGAGGAAGAACAA------------AAC-----------
+----------AAA---------------AGTCAG------CGAAAG----
+-----------------------------ACACAACAAGAAGCAGCT---
+------------------------------GACAAAGGG-----------
+--------------------------------------------------
+--------------------------------------------------
+---------GTCAGTCAAAATTACCCTATAGTACAGAATCTGCAGGGGCA
+AATGGTACACCAGGCCCTATCACCTAGAACTTTAAATGCATGGGTGAAGG
+TAATAGAG---GAGAAGGCTTTCAGCCCAGAAGTAATACCCATGTTTACA
+GCATTATCAGAAGGAGCCACTCCACAAGATCTAAACACCATGCTAAACAC
+AGTGGGGGGACATCAAGCAGCCATGCAAATGTTAAAAGATACTATCAATG
+ATGAAGCTGCAGAATGGGACAGGTTACACCCAGTGCATGCAGGGCCTATC
+CCACCAGGCCAAATGAGAGAACCGAGGGGGAGTGACATAGCAGGAACTAC
+CAGCACCCTTCAGGAACAAATAGCATGGATGACA---------AGCAACC
+CACCTGTC---CCAGTGGGGGAAATCTATAAAAGATGGATAATCCTGGGT
+TTAAACAAAATAGTAAGAATGTATAGCCCTGTCAGCATTTTGGACATAAG
+ACAAGGGCCAAAAGAACCCTTTAGAGACTATGTAGATAGGTTCTTTAAAA
+CCCTAAGAGCTGAACAAGCCACACAGGAAGTAAAGAATTGGATGACAGAC
+ACCCTGTTGGTC---CAAAACGCAAACCCAGATTGTAAGACCATTTTAAA
+AGCGTTGGGACCAGGGGCTTCATTAGAAGAGATGATGACAGCATGTCAGG
+GAGTGGGAGGGCCTAGCCATAAAGCAAGAATTTTGGCTGAGGCAATG---
+---------AGCCAGGTAACA------------------------AATCC
+AGTTGTAATGATGCAGAAA---GGCAAC---TTTAAGGGC---CATAGA-
+--AAAATTGTTAAGTGCTTCAACTGTGGCAAAGAAGGGCACATAGCCAGA
+AATTGCAGGGCCCCTAGAAAAAAGGGCTGTTGGAAATGTGGGAAGGAAGG
+ACATCAGATGAAAGACTGCACT---------GAG---AGACAGGCTAATT
+TTTTAGGGAAAATCTGG---CCT---TCCCACAAG---------GGG---
+AGGCCAGGGAATTTTCTTCAGAGC--------------------------
+----AGACCA------GAGCCAACAGCC----------------------
+-----------------------------CCACCAGCA---GAGAGCTTC
+GGGTTCGGG---------------GAGGAGATA---ACCCCCTCT-----
+-------------------CCGAGGCAGGAGACC----------------
+--------AAAGACAAG---------------------GAACAGAGC---
+---CCTCCT---TTA---ACTTCCCTCAAATCACTCTTTGGCAACGACCC
+ATTGTCACAATAA
diff --git a/Examples/Data/HIV1_REF_2010_gag_macse_AA.fasta b/Examples/Data/HIV1_REF_2010_gag_macse_AA.fasta
new file mode 100644
index 0000000..463ef1b
--- /dev/null
+++ b/Examples/Data/HIV1_REF_2010_gag_macse_AA.fasta
@@ -0,0 +1,78 @@
+>Ref.D.CD.83.ELI.K03454
+MGARASVLSGGKLDKWEKIRLRPGGKKKYRLKHIVWASRELERYALNPGLLET-SEGCKQIIGQLQP-AIQTGTEELRSLYNTVATLYCVH-KGIDVKDTKEALEKMEEE-QNKSK---KKAQQA--AADTGNNS------QVSQNYPIVQNLQ-GQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTSN---PPIPVGEIYKRWIIVGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPQATLEEMMTACQGVGGPSHKARVLAEAMS--QA--T---NSVTT---AMMQRGNFKGPRKIIKCFNCGKEGHIAKNCRAPRKKGCWR-CGKEGHQLKDCTERQANFLGRIWPSHKGRPGNFLQSRP-------- [...]
+>Ref.C.ET.86.ETH2220.U46016
+MGARASILRGEKLDAWEKIKLRPGGKKHYMLKHLVWANRELEKFALNPDLLDT-SAGCKQIIKQLQP-ALQTGTEELKSLFNTVATLYCVH-QKIEIKDTKEALDKIEEE-QNESQ---QKTQQA-GAADRG---------KDSQNYPIVQNMQ-GQMVHQPISARTLNAWVKVVEEKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPVAPGQMRDPRGSDIAGTTSTLQEQIAWMTGN---PPVPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKTLRAEQATQDVKNWMTDTLLVQNANPDCKTILRALGPGASLEEMMTACQGVGGPAHKARVLAEAMS--QV--N---N--TT---IMMQKSNFKGPKRAIKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGRLWPSNKGRPGNFLQSRP-------- [...]
+>Ref.C.ZA.04.04ZASK146.AY772699
+MGARASVLRGEKLDTWEKIRLRPGGKKHYMLKHIVWASRELERFALNPGLLET-SEGCKQILAQIQP-AIQTGTEELKSLFNTIAVLYCVH-KKIDVRDTKEALDKIEEE-QNKSQ---QKTQQA-KAADE----------KVSQNFPIVQNLQ-GQMVHQPLSPRTLNAWVKVIEEKGFNPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPVAPGQMREPRGSDIAGTTSNLQEQVAWMTSN---PPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKTLRAEQATQEVKNWMTDTLLVQNANPDCKTILRALGPGATLEEMMAACQGVGGPGHKARVLAEAMS--QI--N---N--GN---IMMQRSNFKGPKRIVKCFNCGKGRHIAKNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSQKGRPGNFLQNRL-------- [...]
+>Ref.A1.UG.92.92UG037.AB253429
+MGARASVLSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFALNPSLLET-TEGCQQIMEQLQS-ALRTGTEELRSLYNTVATLYCVH-QRIEVKDTKEALDKIEEI-QKKSK---QKTQQA--AADTGSSS------KVSQNYPIVQNAQ-GQMIHQSLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNMMLNIVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPVAPGQMREPRGSDIAGTTSTPQEQIAWMTGN---PPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKTLRAEQATQEVKGWMTETLLIQNANPDCKSILRALGAGATLEEMMTACQGVGGPGHKARVLAEAMS--QV--Q---H--TN---IMMQRGNFKGQKR-IKCFNCGKEGHLAKNCRAPRKKGCWK-CGREGHQMKDCTERQANFLGKIWPSSKGRPGNFPQSRP-------- [...]
+>Ref.A1.RW.92.92RW008.AB253421
+MGARASVLSGGKLDAWEKIRLRPGGKKKYRMKHLVWASRELERFALNPGLLET-TEGCQKIIEQLQP-SVKTGTEELKSLFNTVATLYCVH-QRIDVKDTKEALDKIEEM-QNKSK---QKTQQA--AADIGNSS------KVSQNYPIVQNAQ-GQMIYQSMSPRTLNAWVKVIEEKGFNPEVIPMFSALSEGATPQDLNMMLNIVGGHQAAMQMLKDTINEEAADWDRLHPVQAGPIPPGQMREPRGSDIAGTTSTPQEQIGWMTSN---PPIPVGDIYKRWIILGLNKIVRMYSPVSILDVKQGPKEPFRDYVDRFFKILRAEQATQDVKHWMTETLLIQNANPDCKSILRALGTGATLEEMMTACQGVGGPSHKARVLAEAMS--QV--Q---H--PN---IMMQRGNFRGQKR-IKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSSKGRPGNFPQSRP-------- [...]
+>Ref.H.BE.93.VI997.AF190128
+MGARASVLSGGRLDTLEKIRLRPGGKKKYRLKHIVWASRELERFALNPGLLES-AEGCLQIIEQLRP-SIKTGTEEL?SLFNTVATLYCVL-QRIEVKDTKEALGKIEEI-QNKRQ---QKTQQA--TANKERDN------KVSQNYPIVQNAQ-GQMVHQPIS?RTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNAMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQIAWMTGN---PSIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKTLRAEQATQEVKNWMTDTLLVQNANPDCKTILRALGQGASIEEMMTACQGVGGPSHKARVLAEAMS--QV--T---N--ANA-AIMMQKSNFKGPRKIVKCFNCGKEGHIARNCRAPRKKGCWK-CGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRP-------- [...]
+>Ref.H.BE.93.VI991.AF190127
+MGARASVLSGGKLDAWEKIRLRPGGRKKYRLKHLVWASRELERFALNPDLLET-ADGCQQILGQLQP-ALKTGTEDLQSLYNTIAVLYCVH-QRIDVKDTKEALGKIEEI-QNKNK---QRTQQAPAAADKEKDS------KISQNYPIVQNAQ-GQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNAMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQVAWMTGN---PPIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFRVLRAEQATQDVKNWMTDTLLVQNANPDCRTILKALGRGASIEEMMTACQGVGGPSHKARVLAEAMS--QV--T---N--ASA-AIMMQKGNFKGPRRTVKCSNCGKEGHIARNCRAPRKKGCWK-CGQEGHQMKDCTGRQANFLGKIWPSSKGRPGNFPQKRL-------- [...]
+>Ref.G.PT.x.PT2695.AY612637
+MGARASVLSGGKLDAWEKIRLRPGGKKKYRMKHLVWASRELEKFALNPDLLET-AEGCQQIMRQLQP-ALQTGTEELRSLFNTVATLYCVH-QRIEVKDTKEALEEVEKT-QKKS----QKQQQA--AMDEGNSS------QVSQNYPIVQNAQ-GQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRIHPQQAGPIPPGQIREPRGSDIAGTTSTLQEQIRWMTSN---PPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQATQEVKGWMTDTLLVQNANPDCKTILRALGPGASLEEMMTACQGVGGPSHKARVLAEAMS--QA--S---G--AT---IMMQKSNFKGPKRMIKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSNKGRPGNFLQNRP-------- [...]
+>Ref.K.CM.96.96CM_MP535.AJ249239
+MGARASVLSGGKLDAWEKIRLRPGGKKKYKLKHLVWASRELERFALNPGLLET-TEGCRQIITQIQP-SIQTGSEEIKSLYNTIAVLYFVH-QKIEVKDTKEALDKLEEE-QNKSQ---RKTQQE--AADKG----------VSQNYPIVQNLQ-GQMVHQALSPRTLNAWVKVIEEKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINDEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQIAWMTSN---PPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQATQEVKNWMTDTLLVQNANPDCKTILKALGPGASLEEMMTACQGVGGPSHKARILAEAMS--QV--T---N--PV---VMMQKGNFKGHRKIVKCFNCGKEGHIARNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRP-------- [...]
+>Ref.F2.CM.97.CM53657.AF377956
+---RASLLSGGKLDDLEKIRLRPGGKKKYRLKHIVWASRELERFALNPGLLETK-EGCKQIIGQLQP-SLQTGSEELKSLFNTIVVLYYVH-QRIKIGDTKEALDKLQEE-QDKSQ---QKTQPA--AADKG----------VSQNYPIVQNLQ-GQMVHQSLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVQAGPIPPGQIREPRGSDIAGTTSNLQEQIAWMTSN---PPVPVGEIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKTLRAEQASQEVKGWMTDTLLVQNANPDCKIILKGLGTGATLEEMMTACQGVGGPGHKARILAEAMS--QV--T---S--TS---ILMQKSNFKGQRRNVKCFNCGKEGHIAKNCRAPRKRGCWK-CGKEGHQMKDCTERQANFLGKIWPSNKGRPGNFLQNRP-------- [...]
+>Ref.F1.FI.93.FIN9363.AF075703
+MGARASVLSGGKLDAWEKIRLRPGGKKQYRIKHLVWASRELERFAIDPGLLET-SEGCQKIIAQIQP-SIQTGSEELRSLYNTIAVLYFVH-QKIEVKDTKEALDKLEEE-QNKSQ---QKTQQAAAAADKG----------VSQNYPIVQNLQ-GQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQIQWMTSN---PPVPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKALRAEQATQEVKGWMTDTLLVQNANPDCKIILKGLGIGATLEEMMTACRGVGGPGHKARILAEAMS--QA------N--TT---IMMQKSNFRGQRRIVKCFNCGKEGHIARNCRAPRKKGCWK-CGQEGHQMKDCTERQANFLGKIWPSNKGRPGNFLQSRP-------- [...]
+>Ref.D.UG.94.94UG114.U88824
+MGARASVLSGGKLDEWEKIRLRPGGKKKYRLKHLVWASRELERFALNPGLLET-SEGCRQIIRQLQP-SIQTGSEEIKSLYNTVVTLYCVH-ERIKVASTKEALDKIEEE-QAKSK---KKAQQA--TADTRNSS------QVSQNYPIVQNLQ-GQMVHHPLSPRTLNAWVKVIEEKAFNPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPVAPGQLREPRGSDIAGTTSNLQEQIGWMTSN---PPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMS--QA--T---N--ANT-AIMMQRGNFKGPKKIIKCFNCGKEGHTAKNCRAPRKKGCWK-CGREGHQMKDCTERQANFLGKIWPSHNGRPGNFLQSRP----PA-- [...]
+>Ref.F1.BR.93.93BR020_1.AF005494
+MGARASVLSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFALDPGLLET-SEGCRKIIGQLQP-SLQTGSEELKSLYNTIAVLYYVH-QKVEVKDTKEALEKLEEE-QNKGR---QKTQQA--TAEKG----------VSQNYPIVQNLQ-GQMVHQSLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPTQAGPIPPGQIREPRGSDIAGTTSTLQEQIQWMTGN---PPVPVGEMYKRWIILGLNKIVRMYSPVGILDIRQGPKEPFRDYVDRFFKTLRAEQATQEVKGWMTDTLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPSHKARVLAEAMS--QA--T---N--TA---IMMQKSNFKGQRRIVKCFNCGKEGHIAKNCRAPRKKGCWK-CGREGHQMKDCTERQANFLGKIWPSNKGRPGNFIQNRP-------- [...]
+>Ref.D.CM.01.01CM_4412HAL.AY371157
+--ARASILSGGKLDAWEKIRLRPGGSKKYRLKHLIWASNELERFALNPGLLET-SDGCKQILGQLQP-ALKTGTEELRSLFNAVAVLYCVH-ERIEVKDTKEALDKIEEE-QNKSK---KKAQQA--AADTGDNK------QVSQNYPIVQNLQ-GQMVHQALSPRTLNAWVKVIEEKAFNPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVQAGPVAPGQMREPRGSDIAGTTSTLQEQIGWMTSN---PPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQATQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMS--QA--TAGMN--AA---IMMQRGNFKGPKRIVKCFNCGKEGHIAKNCRAPRKKGCWK-CGREGHQMKDCTERQANFLGKIWPSNKGRPGNFLQSRP-------- [...]
+>Ref.F1.FR.96.96FR_MP411.AJ249238
+MGARASVLSGGKLDAWERIRLRPGGKKKYRMKHLVWASRELERFAVDPGLLET-PEGCKQIIRQLQP-SLQTGSEELRSLFNTVAVLYCVH-QKIEIKDTKEALEKLEEE-QNKGQ---QKTQQA--AADKG----------VSQNYPIVQNLQ-GQMVHQPISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPAHAGPILPGQMREPRGSDIAGTTSTLQEQIQWMTSN---PPVPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQASQEVKNWMTESLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMS--QA--T---N--AA---IMMQKSNYKGPRRFIKCFNCGKEGHIAKNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSNKGRPGNFLQNRP-------- [...]
+>Ref.A2.CY.94.94CY017_41.AF286237
+MGARASILSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELEKFSINPGLLET-PEGCRQIIRQLQP-ALQTGTEELKSLYNTVVVLYWVH-QRVDVKDTKEALDKIEEE-QNK-----QKTQHA--AADTGNSS--------SQNYPIVQNAQ-GQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRVHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQIGWMTSD---PPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQATQEVKNWMTDTLLVQNANPDCRSILRALGPGASLEEMMTACQGVGGPSHKARVLAEAMSHVQS--T---N--TN---IMMQRGNFRGQKR-IKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSNKGRPGNFPQSRT-------- [...]
+>Ref.H.CF.90.056.AF005496
+MGARASVLSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELERFALNPGLLET-PEGCLQIIEQIQP-AIKTGTEELKSLFNLVAVLYCVH-RKIDVKDTKEALDKIEEI-QNKSQ---QKTQQA--AADKEKDN------KVSQNYPIVQNAQ-GQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNAMLNTVGGHQAAMQMLKDTINEEAAEWDRVHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQIAWMTGN---PAIPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKTLRAEQATQDVKNWMTETLLVQNANPDCKTILRALGQGASIEEMMTACQGVGGPSHKARVLAEAMS--QV--T---N--TNT-AIMMQKGNFKGQRKFVKCFNCGKEGHIARNCRAPRKKGCWK-CGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRP-------- [...]
+>Ref.A2.CM.01.01CM_1445MV.GU201516
+--ARASVLSGGKLDAWEKIRLRPGGRKKYRMKHLVWASRELEKYSINPGLLET-SEGCKQIIRQLHS-ALPVGTEELKSLYNTIAVLYYVH-QKIEVKDTKEALDKLEEE-QNKYK---QKTQQA--AAATGNSS--------SQNYPIVQNAQ-GQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRVHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQIGWMTSN---PPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQATQDVKNWMTDTLLVQNANPDCKTILRALGPAATLEEMMTACQGVGGPGHKARVLAEAMS--QIHST---N--QN---VMMQRGNFRGPKR-IKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPPNKGRPGNFPQSRT-------- [...]
+>Ref.A1.AU.03.PS1044_Day0.DQ676872
+MGARASILSGGRLDAWEKIRLRPGGKKKYRLKHLVWASRELERFAL?P?LLES-AEGCQQIMEQLQP-A?KTG?EEIKSLFNTVATLYCVH-QRIDVKDTKEA?DKIEEI-KNKSK---QRTQQA--AADTGNSG------KVSQNYPIVQNAQ-GQMI?QNLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNVMLNIVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQIREPRGSDIAGATSTPQEQLQWMTGN---PPIPVGDIYKRWIILGLNKIVRMYSPTSILDIRQGPKESFRDYVDRFFKALRAEQATQEVKSWMTETLLVQNANPDCKSILKALGSGATLEEMMTACQGVGGPSHKARVLAEAMS--QA--Q---Q--TS---IMMQRGNFRGGQKRIKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSSKGRPGNFPQSRP-------- [...]
+>Ref.J.SE.93.SE9280_7887.AF082394
+MGARASILSGGKLDDWEKIRLRPGGKKKYRIKHLVWASRELDRFALNPGLLES-AKGCQQILVQLQP-ALQTGTQEIKSLYNTVATLYCVH-QRIEIKDTMEALEKIEEI-QNKNK---QQAQKA--ETDKKDNS------QVSQNYPIVQNLQ-GQPVHQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTIGGHQAAMQMLKDTINEEAAEWDRVHPVHAGPIAPGQVREPRGSDIAGTTSTLQEQIGWMTGN---PPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKALRAEQATQDVKNWMTDTLLVQNANPDCKTILKALGSGATLEEMMTACQGVGGPGHKARVLAEAMS--QV--T---N--TN---IMMQRGNFRDHKRIVKCFNCGKQGHIAKNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRP-------- [...]
+>Ref.F2.CM.95.95CM_MP257.AJ249237
+MGARASVLSGGKLDAWEKIRLRPGGKKKYRLKHIVWASRELKRFALNPGLLET-TEGCKKIIGQLQP-SLQTGSEELKSLFNTIVVLYYVH-QKIEVRDTKEALDKLQEE-QDKHQ---QKTQQA--TADKGVSK------GVSQNYPILQNLQ-GQMVHQSLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQIAWMTSN---PPVPVGEIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKTLRAEQATQEVKGWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPSHKARILAEAMS--KA--T---G--AA---IMMQKSNFKGQRRIVKCFNCGKEGHIARNCRAPRKRGCWK-CGQEGHQMKDCTERQANFLGKMWPSNKGRPGNFLQNRP-------- [...]
+>Ref.F2.CM.95.95CM_MP255.AJ249236
+MGARASVLSGGKLDAWEKIRLKPGGKKRYRLKHLVWASRELERFALNPSLLET-TEGCKKIIGQLQS-SLQTGSEELKSLYNAVVVLYYVH-QRIDVRDTKEALDKLQEE-QDKSQ---QKEQQK--AADKE----------VSQNYPIVQNIQ-GQMVHQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPIPPGQMREPRGSDIAGTTSTLQEQITWMTGN---PPVPVGEIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKTLRAEQATQEVKNWMTETLLVQNSNPDCKTILKALGPGATLEEMMTACQGVGGPGHKARILAEAMS--KA--T---S--TA---IMMQKSNFKGQKRIVKCFNCGKEGHIARNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSNRGRPGNFLQNRP-------- [...]
+>Ref.C.BR.92.BR025_d.U52953
+MGARASILRGGKLDAWERIKLKPGGKKHYMMKHLVWASRELERFALDPGLLET-SEGCKQIMKQLQP-ALQTGTKELISLHNTVATLYCVH-EKIDVRDTKEALDKIKEE-QNKSQ---QKTQQA-EAADKG---------KVSQNYPIVQNLQ-GQMVHQPISARTLNAWVKVVEEKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPVAPGQMREPRGSDIAGTTSTLQEQITWMTNN---PPVPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKTLRAEQATQDVKNWMTDTLLVQNANPDCKTILRALGPGASLEEMMTACQGVGGPGHKARVLAEAMS--KV--N---N--TN---IMMQRSNCKGPKRTIKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQVKDCTERQANFLGKIWPSHRGRPGNLLQNRT-------- [...]
+>Ref.G.KE.93.HH8793_12_1.AF061641
+MGAR?S?LSGGKLDAWEKI?LTPGGKKKYRLKHLVWASREMERFALNPGLLET-AEGCQQIMSQLQP-AI?TGTEEIKSLFNTVATLYCVHP-KIEVKDTKEALEEVEKI-QKKSQ---QKIQQA--ARDEGNSS------QVSQNYPIVQNAQ-GQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRIHPPQAGPIPPGQIREPRGSDIAGTTSNLQEQIRWMTSN---PPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQATQEVKGWMTDTLLVQNANPDCKTILRALGPGATLEEMMTACQGVGGPGHKARVLAEAMS--QA--T---G--AAA-AIMMQKSNFKGPKRNIKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKDCTERQANF!GKIWPSNKGRPGNFLQNRP-------- [...]
+>Ref.A2.CD.97.97CDKTB48.AF286238
+MGARASVLSGGKLEAWEKIRLRPGGKKKYRLKHLVWASRELEKFSINPSLLET-ETGCRRIFGQLQP-ALETGTEELRSLYNTIAVLYFVH-QKIEVKDTKEALDKIEEE-QNKCK---QKTQQA--AADTGSSSSQNYRGSSSQNYPIVQNAQ-GQMVHQAVSPRTLNAWVKVVEEKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVQAGPIPPGQMREPRGSDIAGATSNLQEQIGWMTSN---PPIPVGEIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKTLRAEQATQEVKNWMTDTLLVQNANPDCKSILRALGPGATLEEMMTACQGVGGPGHKARVLAEAMS--QV--Q---N--TN---IMIQRGNFKGQKR-IKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSNKGRPGNFPQSRT-------- [...]
+>Ref.G.BE.96.DRCBL.AF084936
+MGARASVLSGGKLDAWEKIRLRPGGKKRYRMKHLVWASRELDRFALNPGLLET-AEGCQKIMAQLQP-ALQTGTEEIKSLFNTVATLYCVH-QKIEVRDTKEALEEVEKI-QKKSQ---QKENSS--S-------------QVSQNYPIVQNAQ-GQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKETINDEAAEWDRLHPQQAGPIAPGQIRDPTGSDIAGATSTLQEQIRWMTSN---PPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQATQEVKSWMTDTLLIQNANPDCKIILKGLGQGATLEEMMTACQGVGGPSHKARVLAEAMS--QA--S---G--AAA-AIMMQKSNFKGPRRTIKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKECTERQANFLGKIWPSNKGRPGNFLQNRP-------- [...]
+>Ref.K.CD.97.97ZR_EQTB11.AJ249235
+MGARASVLSGGKLDKWEKIQLRPGGKKKYRLKHLVWASRELERFALNPNLLET-VEGCRQIIRQLQP-SLQTGSEELRSLFNTVATLYWVH-QSIQVRDTKEALDKLEEE-QNRTQ---QKTQQG--KADKG----------VSQNYPIVQNLQ-GQMVHQALSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRMHPVQAGPIPPGQIREPRGSDIAGTTSTLQEQITWMTSN---PPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFRVLRAEQATQEVKNWMTETLLVQNANPDCRTILKALGSGATLEEMMTACQGVGGPGHKARVLAEAMS--QV--T---N--SA---VMMQRGNFKGQRRIIKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKDCSERQANFLGKFWPLNKERPGNFLQNRP-------- [...]
+>Ref.D.TZ.01.A280.AY253311
+--ARASVLSGGQLDAWEKIRLRPGGKKKYQLKHIVWASRELERFALNPGLLET-SEGCKQI!---QP-AIQTGSEELKSLFNTVATLYCVH-RKIEVKDTKEALEKLEEE-QTKSK---KKAQQA--TADTGSSS------QVSQNYPIVQNLQ-GQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIAWMTNN---PPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFYKTLRAEQASQDVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPSHKARVLAEAMS--QA--T---N--VNA-AIMMQRGNFKGPRKIIKCFNCGKEGHIAKNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRP-------- [...]
+>Ref.F2.CM.02.02CM_0016BBY.AY371158
+--ARASVLSGGKLDDWEKIRLRPGGKKKYRLKHIVWASKELERFALNPGLLET-TEGCKQIIGQLQ-SSLQTGSEEIKSLYNTVAVLYYVH-QKIQIRDTKEALDKLQEE-QDKYQ---QKTQPA--AADKG----------VSQNYPIVQNLQ-GQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVQAGPIPPGQIREPRGSDIAGTTSTLQEQIAWMTSN---PPVPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQATQEVKGWMTDTLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPGHKARILAEAMS--QV--T---A--TS---VLMQKSNFKGQKRIVKCFNCGKEGHIAKNCRAPRKRGCWK-CGKEGHQMKDCTERQANFLGKIWPSNKGRPGNFIQSRP-------- [...]
+>Ref.F1.BE.93.VI850.AF077336
+MGARASILSGGKLDEWEKIQLRPGGKKRYKMKHLIWASRELERFALDPGLLET-SEGCQKIIRQLQP-SLQTGSEELKSLFNTVAVLYYVH-QRAGVTDTKEALDKLEEE-QNKSQ---QKTQQA--AADKG----------VSQNYPIVQNLQ-GQMVHQSLSPRTLNAWVKVIEEKAFSPEVIPMFSALSEGATPTDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPAPPGQMREPRGSDIAGTTSTLQEQIQWMTGN---PPVPVGDIYKRWIILGLNKIVRMYSPVSILDIKQGPKEPFRDYVDRFFKVLRAEQASQDVKGWMTDTLLVQNANPDCKTILKALGTGATLEEMMTACQGVGGPSHKARVLAEAMS--QA------N--SA---IMMQKSNFKGQRRVVKCFNCGKEGHIARNCRAPRKKGCWK-CGREGHQMKDCTERQANFLGKIWPSNKGRPGNFLQSRP-------- [...]
+>Ref.B.NL.00.671_00T36.AY423387
+MGARASVLSGGELDRWEKIRLRPGGKKRYKLKHIVWASRELERFAVNPGLLET-SEGCRQILGQLQP-ALQTGSEELKSLFNTVATLYCVH-ARIEVKDTKEALEKIEEE-QNKSKKRAQQAQQA--EADAGKNN------PVSQNYPIVQNLQ-GQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNN---PPIPVGEIYKRWIILGLNKIVRMYSPTSILDIKQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNSNPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMS--QV--T---S--AP--AIMMQRGNHRNQRRTVKCFNCGKEGHIARNCRAP?KKGCWK-C?K?GHQMKDCT?RQA?FLGKIWPSHKGRPGNFLQSRPEPTAPSQS [...]
+>Ref.C.IN.95.95IN21068.AF067155
+MGARASILRGGKLDKWEKIRLRPGGKKRYMLKHLVWASRELDRFAVNPGLLET-AEGCKQIIKQLQP-ALQTGTEELRSLFNTVATLYCVH-AGIEVRDTKEALDKIEEE-QNKIK---QKTQQA--KEDDG---------KVSQNYPIVQNLQ-GQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFTALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVPAGPIAPGQLREPRGSDIAGTTSTLQEQIAWMTNN---PPVPVGDIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQATQDVKNWMTETLLVQNANPDCKTILRALGPGASLEEMMTACQGVGGPSHKARVLAEAMS--Q---T---N--SA---ILMQRSNFKGSKRIVKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRP-------- [...]
+>Ref.B.US.98.1058_11.AY331295
+----ASVLSGGKLDTWEKIRLRPGGKKKYKLKHIVWASRELERFALNPGLLET-AEGCRQLLGQLQP-SLQTGSEELKSLFNTIATLYCVH-QRIEVRDTKEALDKIEEE-QNKSK---KKAQQAAAAADTGNSS------QVSQNYPIVQNLQ-GQMVHQAISPRTLNAWVKVIEEKAFSPEVIPMFAALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRIHPAQAGPIAPGQIRDPRGSDIAGTTSTLQEQITWMTNN---PPIPVGEIYKKWIIMGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMS--QV--T---N--SG--AIMMQKGNFRNQ--VVRCFNCGKVGHIAKNCRAPRKKGCWK-CGKEGHQMKDCDQRQANFLGKIWPSHKGRPGNFLQSRP-------- [...]
+>Ref.J.CD.97.J_97DC_KTB147.EF614151
+--------------AWEKIWLRRRGEKKYRRKHIVWASRELDRYALNPGLL!S!AEGGEQILVQIQP!SL!TGAEEIKSLFNTEATLYCVH!!RIDIRDTKEALDKIEE!L!NKSK---EKAKKE--AVKKYNS-------QVSHNYPILQNM!!GELVHQALSPRTLNAWVKVIEEKGFNPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINEEAAEWDRLHPVHAGPVAPGQMREPRGSDIAGTTSNLQEQIAWITGNGGNQPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQATQEVKNWMTDTLLVQNANPDCKTILRALGAGATLEEMMTACQGVRVSSHRARVFGEAIS--QV--N---N--TN---IIMQRGNFRGQKRSVKCFNCGKEGHIAKNCRAPRKKGC!K!CGKEGHQIKNCTERQANFLGKVWPSSKGRPGNFLQSRP-------- [...]
+>Ref.J.CM.04.04CMU11421.GU237072
+MGARASVLSGGKLDTWEKIRLRPGGKKRYRLKHLVWASRELDRFALNP?LLET-SKGCQQILVQLQP-SYQTGTEEIKSLYNTVATLYCVH-E?IEVKDTKE?LDKIEEL-QKKNK---QQAQKA--E?DK???S------QVSQNYPIVQNMQ-GQPV??ALSPRTLNAWVKV?E?KA?SPEVIPMFS?LSEGAT?QDLNTMLNT?GGH?AAMQMLKDTINEEAAEWDR?HPVHAGP?APGQAREPRGSDIAGTTSTLQEQIAWMTGN---PPIPVGEIYK?WIILGLNKI?RMYSPVSILDIRQGPK?PFR?YVDRFFKTLRAEQATQEVKNWMTDTLLIQNANPDCRT?LKALGPGATL?EMMTA?QGVGGPGHKARVL?EAMS--QM--T---N--TA--NIMMQ?G?FKG?KRMIKCFNC?KPGHL?R?CRAP??KGCWK-CGQEG?QMKDCTERQANFLG?IWPSN?GRPGNF?QS?P-------- [...]
+>Ref.H.GB.00.00GBAC4001.FJ711703
+MGARASVLSGGKLDAWEKIRLRPGGKKKYRLKHLVWASRELDRFALNPDLLET-ADGCLKI?GQIQP-ALQTGTEEIKSLFNLVAVLYCVH-QKIEVQDTSEALNKVKEI-QNKNQ---QTTQQA--TA?KEKDS------KISQNYPIVQNAQ-GQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALS?G?TPQD?NTMLNSVGGHQAAMQ?LKDTINEEAAEWDRTHPVHAGPIPPGQMREPRGSDIAGTT?NLQEQIAWMT?N---PP?PV??IYKRWII?GLNKIVRMYSPVSILDIKQGPKEPFR?YVDR?FKTLRAEQATQDVKNWMTDTLLVQNANPDCKTILKALGQGASIEEMMTACQGVGGPGHKARVLAEAMS--QM--T---N--MNT-AIMVQKGNFKGQRRTVKCFNCGKEGHIARNCRAPRKKGCWK-CGREGHQMKDCTERQANFLGKIWPSSKGRPGNFLQSRP-------- [...]
+>Ref.G.NG.92.92NG083.U88826
+VGARASVLSGGKLDSWEKIRLRPGGRKKYKLKHIVWASRELGRFALNRDLLET-AEGCVQIMKQLQP!SL!TGTEELRSLFNTVATLYCVH-QKIEVKDTKEAPEEVEKI-QKNSQ---QEIQQA--AKNEGNSN------PVSQNYPIVQNAQ-GQMIHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKDTINDEAAEWDRIHPQQAGPIPPGQIREPSGSDIAGTTSTLQEQIRWMTSN---PPIPVGEIYKRWIILGLNKIVRMYSPVSILDIRQGPKEPFRDYVDRFFKTLRAEQATQEVKGWMTDTLLVQNANPDCKTILRALGPGATLEEMMTACQGVGGPSHKARVLAEAMS--QA--S---G--AAAAAIMMQKSNFKGPRRIIKCFNCGKEGHLARNCRAPRKKGCWK-CGKEGHQMKECTERQANFLGKIWPSNKGRPGNFLQNRT-------- [...]
+>Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455
+MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLET-SEGCRQILGQLQP-SLQTGSEELRSLYNTVATLYCVH-QRIEIKDTKEALDKIEEE-QNKSK---KKAQQA--AADTGHSN------QVSQNYPIVQNIQ-GQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNN---PPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMS--QV--T---N--SA--TIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWK-CGKEGHQMKDCTERQANFLGKIWPSYKGRPGNFLQSRP-------- [...]
+>Ref.B.TH.90.BK132.AY173951
+MGARASVLSGGQLDRWEKIRLRPGGKKKYRLKHIVWASRELERFAVNPGLLET-SEGCRQILGQLQP-SLQTGSEELRSLYNTIAVLYCVH-QKIEVKDTKEALEKIEEE-QNKSK---KKAQQA--AANTENSS------QVSQNYPIVQNMQ-GQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHN---PPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVLAEAMS--QV--T---N--SA--TIMMQKGNFRNQRKIVKCFNCGKEGHIARNCRAPRKKGCWR-CGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRP-------- [...]
diff --git a/Examples/Data/HIV1_REF_2010_gag_macse_DNA.fasta b/Examples/Data/HIV1_REF_2010_gag_macse_DNA.fasta
new file mode 100644
index 0000000..a230e43
--- /dev/null
+++ b/Examples/Data/HIV1_REF_2010_gag_macse_DNA.fasta
@@ -0,0 +1,78 @@
+>Ref.D.CD.83.ELI.K03454
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATAAATGGGAAAAAATTCGGTTACGGCCAGGAGGAAAGAAAAAATATAGACTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATATGCACTTAATCCTGGCCTTTTAGAAACA---TCAGAAGGCTGTAAACAAATAATAGGGCAGCTACAACCA---GCTATTCAGACAGGAACAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTACAT---AAAGGAATAGATGTAAAAGACACCAAGGAAGCTTTAGAAAAGATGGAGGAAGAG---CAAAACAAAAGTAAG---------AAAAAGGCACAGCAAGCA------GCAGCTGACACAGGAAACAACAGC------------------CAGGTCAGCCAAAATTATCCTATAGTGCAGAACCTACAG---GGGCAAATGGTACATCAGGCCATATCA [...]
+>Ref.C.ET.86.ETH2220.U46016
+ATGGGTGCGAGAGCGTCAATATTAAGAGGCGAAAAATTAGATGCCTGGGAAAAAATTAAGTTAAGGCCAGGGGGAAAGAAACACTATATGCTGAAACACCTAGTCTGGGCAAACAGGGAGCTGGAAAAATTTGCACTTAACCCTGACCTTTTAGATACA---TCAGCAGGCTGTAAACAAATAATTAAACAGCTACAACCA---GCTCTTCAGACAGGAACAGAGGAACTTAAATCATTATTTAATACAGTGGCAACTCTCTATTGTGTACAT---CAAAAGATAGAGATAAAAGACACCAAGGAAGCCTTAGACAAGATAGAGGAAGAA---CAAAACGAAAGTCAG---------CAAAAAACACAGCAGGCA---GGAGCAGCTGACAGAGGA---------------------------AAGGACAGTCAAAATTATCCTATAGTGCAGAATATGCAG---GGGCAAATGGTACATCAGCCCATATCA [...]
+>Ref.C.ZA.04.04ZASK146.AY772699
+ATGGGTGCGAGAGCGTCAGTATTAAGAGGCGAAAAATTAGATACATGGGAAAAAATTAGGTTAAGGCCAGGGGGAAAGAAACACTATATGCTAAAACACATAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTCAACCCTGGCCTTTTAGAAACA---TCAGAAGGCTGTAAACAAATATTGGCACAAATACAACCA---GCTATTCAGACAGGAACAGAGGAACTTAAATCATTATTCAACACAATAGCAGTTCTCTATTGTGTACAT---AAAAAGATAGATGTAAGAGACACCAAGGAAGCCTTAGACAAGATAGAGGAAGAG---CAAAACAAAAGTCAG---------CAAAAAACACAGCAGGCA---AAAGCGGCTGACGAA------------------------------AAGGTCAGTCAAAATTTTCCTATAGTACAGAATCTTCAA---GGGCAAATGGTACATCAACCCCTATCA [...]
+>Ref.A1.UG.92.92UG037.AB253429
+ATGGGTGCGAGAGCGTCAGTATTAAGTGGGGGAAAATTAGATGCATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATCTAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTAACCCTAGCCTTTTAGAAACA---ACAGAAGGATGTCAACAAATAATGGAACAATTACAATCA---GCTCTCAGAACAGGAACAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGCGTACAT---CAACGGATAGAGGTAAAAGACACCAAGGAAGCTCTAGATAAAATAGAGGAGATA---CAAAAGAAAAGCAAG---------CAAAAGACACAGCAGGCA------GCAGCTGACACAGGAAGTAGCAGC------------------AAGGTCAGCCAAAATTACCCTATAGTGCAAAATGCACAA---GGGCAAATGATCCACCAGTCCTTGTCA [...]
+>Ref.A1.RW.92.92RW008.AB253421
+ATGGGTGCGAGAGCGTCAGTATTAAGTGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGAATGAAACATCTAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTAACCCTGGCCTTTTAGAAACA---ACAGAAGGATGTCAAAAAATAATAGAACAGTTACAACCA---TCTGTCAAGACAGGAACAGAAGAACTTAAATCATTATTTAATACAGTAGCAACCCTCTATTGCGTACAT---CAACGGATAGATGTAAAAGACACCAAGGAAGCCCTAGATAAAATAGAGGAAATG---CAAAATAAGAGCAAG---------CAAAAGACACAACAGGCA------GCAGCTGACATAGGAAATAGCAGC------------------AAGGTCAGCCAAAATTACCCTATAGTGCAAAATGCACAA---GGGCAAATGATATATCAGTCCATGTCA [...]
+>Ref.H.BE.93.VI997.AF190128
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGACGATTAGATACTTTGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGGCTAAAACATATAGTATGGGCAAGCAGAGAGCTGGAAAGATTTGCACTTAACCCCGGCCTTTTAGAATCA---GCAGAAGGCTGTCTACAAATAATAGAACAACTACGGCCA---TCTATTAAGACAGGAACAGAAGAACTTCWATCATTATTTAATACCGTAGCGACCCTCTATTGCGTACTT---CAAAGAATAGAGGTAAAAGACACCAAGGAAGCTTTAGGGAAGATAGAGGAAATA---CAAAACAAAAGGCAG---------CAAAAAACACAGCAAGCA------ACAGCTAATAAGGAAAGAGACAAC------------------AAGGTCAGTCAAAATTATCCTATAGTACAGAATGCTCAA---GGGCAGATGGTACACCAGCCCATATCA [...]
+>Ref.H.BE.93.VI991.AF190127
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCTTGGGAGAAAATTCGGTTAAGGCCAGGGGGAAGGAAAAAATATAGGCTAAAACATCTGGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTAACCCCGACCTTTTAGAAACA---GCAGATGGCTGCCAACAAATACTAGGACAGCTACAGCCA---GCTCTTAAGACAGGAACAGAAGACCTTCAATCATTATATAATACAATAGCAGTCCTCTATTGCGTACAT---CAAAGAATAGATGTGAAAGACACCAAGGAAGCTTTAGGGAAGATAGAGGAAATA---CAGAATAAGAACAAG---------CAAAGAACACAGCAGGCCCCAGCAGCAGCTGATAAAGAAAAGGACAGC------------------AAGATCAGTCAAAATTATCCTATAGTACAGAATGCCCAG---GGGCAAATGGTACACCAGGCAATATCA [...]
+>Ref.G.PT.x.PT2695.AY612637
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAAATATAGAATGAAACATTTAGTATGGGCAAGCAGGGAGCTGGAAAAATTTGCACTCAACCCTGACCTTTTAGAAACA---GCAGAAGGTTGTCAACAAATAATGAGACAGTTACAACCA---GCTCTCCAGACAGGAACAGAGGAGCTTAGATCATTATTTAATACAGTAGCAACACTCTATTGTGTACAT---CAAAGGATAGAGGTAAAAGACACCAAAGAAGCTCTAGAGGAAGTGGAAAAGACA---CAGAAGAAAAGT------------CAGAAACAACAGCAGGCA------GCAATGGACGAAGGAAACAGCAGC------------------CAAGTCAGCCAAAATTATCCTATAGTGCAGAATGCACAA---GGGCAAATGGTACATCAGGCCATATCA [...]
+>Ref.K.CM.96.96CM_MP535.AJ249239
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCATGGGAAAAGATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAAACTGAAACATCTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCACTCAACCCTGGCCTTTTAGAGACA---ACAGAAGGCTGTCGGCAAATAATAACACAAATACAGCCA---TCCATTCAAACAGGATCAGAAGAGATTAAATCACTATATAATACAATAGCAGTCCTCTATTTTGTACAT---CAAAAGATAGAGGTAAAAGACACCAAGGAAGCCTTAGACAAACTAGAGGAAGAA---CAAAACAAAAGTCAG---------CGAAAGACACAACAAGAA------GCAGCTGACAAAGGG------------------------------GTCAGTCAAAATTACCCTATAGTACAGAATCTGCAG---GGGCAAATGGTACACCAGGCCCTATCA [...]
+>Ref.F2.CM.97.CM53657.AF377956
+---------AGAGCGTCACTATTAAGCGGGGGAAAATTAGATGATTTGGAAAAAATTCGGTTAAGGCCAGGGGGGAAGAAAAAATATAGGCTGAAACATATAGTATGGGCAAGCAGGGAGCTAGAAAGATTTGCACTTAATCCTGGCCTTTTAGAGACAAAG---GAAGGCTGTAAACAAATAATAGGACAACTACAACCA---TCCCTTCAGACAGGATCAGAAGAGCTTAAATCATTATTCAACACAATAGTAGTCCTCTATTATGTACAT---CAAAGGATAAAAATAGGAGACACCAAGGAAGCTTTAGATAAGCTACAGGAAGAA---CAAGACAAAAGTCAG---------CAAAAAACACAACCAGCA------GCGGCTGACAAAGGG------------------------------GTCAGTCAAAATTACCCTATAGTACAGAATCTTCAG---GGACAAATGGTACACCAGTCTCTATCA [...]
+>Ref.F1.FI.93.FIN9363.AF075703
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAACTAGATGCATGGGAAAAAATTCGGTTAAGGCCGGGGGGAAAGAAACAATATAGAATAAAACATCTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCAATAGATCCTGGCCTTCTAGAAACA---TCAGAAGGCTGTCAAAAAATAATAGCACAGATACAGCCA---TCCATTCAGACAGGATCAGAAGAGCTTAGATCATTATATAACACAATAGCAGTCCTCTATTTTGTACAT---CAAAAGATAGAGGTAAAGGACACCAAGGAAGCTTTAGATAAGCTAGAGGAAGAA---CAAAACAAAAGTCAG---------CAAAAGACACAGCAAGCGGCAGCTGCAGCTGACAAAGGG------------------------------GTCAGTCAAAATTACCCTATAGTACAGAATCTTCAG---GGACAAATGGTACATCAGGCTATATCA [...]
+>Ref.D.UG.94.94UG114.U88824
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGAGGAAAATTAGATGAATGGGAAAAAATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAGACTAAAACATCTAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCACTTAATCCTGGCCTTTTAGAAACA---TCAGAAGGCTGTAGACAAATAATAAGACAGCTACAACCA---TCTATTCAGACAGGATCAGAGGAAATTAAATCATTATATAATACAGTGGTAACCCTCTATTGTGTACAT---GAGAGGATAAAGGTAGCAAGCACCAAGGAAGCTTTAGACAAGATAGAGGAAGAA---CAAGCCAAAAGTAAG---------AAAAAAGCACAGCAAGCA------ACAGCTGACACAAGAAACAGCAGC------------------CAGGTCAGCCAAAATTATCCTATAGTGCAAAACCTACAG---GGGCAAATGGTACACCATCCCCTATCA [...]
+>Ref.F1.BR.93.93BR020_1.AF005494
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCTTGGGAAAAAATTCGGTTAAGGCCGGGGGGAAAGAAAAAATATAGACTAAAACATCTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCACTTGATCCAGGCCTTCTAGAAACA---TCAGAAGGCTGTCGAAAAATAATAGGACAGTTACAACCA---TCCCTTCAGACAGGATCAGAAGAGCTCAAATCATTATATAATACAATAGCAGTCCTCTATTATGTACAT---CAAAAGGTAGAGGTAAAAGACACCAAGGAGGCTTTAGAGAAGCTAGAGGAAGAA---CAAAACAAAGGTCGG---------CAAAAGACACAGCAAGCG------ACTGCTGAAAAAGGG------------------------------GTCAGTCAAAATTACCCTATAGTACAGAATCTTCAG---GGACAAATGGTACACCAGTCTTTATCA [...]
+>Ref.D.CM.01.01CM_4412HAL.AY371157
+------GCGAGAGCGTCAATATTAAGCGGGGGAAAATTGGATGCATGGGAAAAAATTCGGTTACGGCCAGGGGGAAGCAAAAAGTATAGGCTAAAACATCTAATATGGGCAAGCAATGAGCTAGAACGATTTGCACTTAATCCTGGCCTTTTAGAGACA---TCAGATGGCTGTAAACAAATACTAGGCCAGCTACAACCA---GCTCTTAAAACAGGAACAGAAGAACTTAGATCATTATTTAATGCAGTAGCAGTACTCTATTGTGTACAT---GAAAGGATAGAGGTAAAGGACACCAAGGAAGCCTTAGACAAGATAGAGGAAGAA---CAAAACAAAAGTAAG---------AAAAAAGCACAGCAAGCA------GCAGCTGACACAGGGGACAACAAA------------------CAGGTCAGCCAAAATTATCCTATAGTGCAGAACTTACAG---GGGCAAATGGTACACCAAGCCCTATCA [...]
+>Ref.F1.FR.96.96FR_MP411.AJ249238
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCATGGGAAAGAATTCGATTAAGACCGGGGGGAAAGAAAAAATATAGAATGAAGCATCTAGTATGGGCAAGCAGGGAGTTAGAACGATTTGCAGTTGATCCTGGACTTCTAGAAACA---CCAGAAGGCTGTAAGCAAATAATAAGACAGCTACAACCA---TCCCTTCAGACAGGATCAGAAGAGCTTAGATCATTGTTCAATACAGTAGCAGTTCTCTATTGTGTACAT---CAAAAGATAGAGATAAAGGACACCAAGGAAGCTTTAGAGAAGTTAGAGGAGGAA---CAAAACAAAGGTCAG---------CAAAAGACACAGCAAGCG------GCAGCTGACAAAGGA------------------------------GTCAGTCAAAATTACCCTATAGTACAAAATCTTCAG---GGACAGATGGTACATCAGCCTATATCA [...]
+>Ref.A2.CY.94.94CY017_41.AF286237
+ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGCTTGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGACTGAAACATTTGGTATGGGCAAGCAGGGAGCTGGAGAAATTCTCAATTAACCCTGGCCTTTTAGAAACA---CCAGAGGGATGTAGACAAATAATAAGGCAGTTACAACCA---GCTCTCCAAACAGGAACAGAAGAACTTAAATCATTATATAATACAGTAGTAGTCCTCTACTGGGTACAT---CAAAGGGTAGATGTAAAAGACACCAAGGAAGCTCTAGATAAAATAGAGGAAGAA---CAAAACAAG---------------CAGAAAACACAGCATGCA------GCAGCTGACACAGGGAACAGCAGC------------------------AGTCAAAATTATCCCATAGTGCAAAATGCACAA---GGGCAAATGGTACACCAGGCTATATCA [...]
+>Ref.H.CF.90.056.AF005496
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAAAATTAGATGCTTGGGAGAAAATTCGGCTAAGGCCAGGGGGAAAGAAAAAATATAGGCTAAAACATCTAGTATGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTAACCCCGGCCTTTTAGAAACA---CCAGAAGGCTGTCTACAGATAATAGAACAGATACAGCCA---GCTATTAAGACAGGAACAGAAGAACTTAAATCATTATTTAATCTAGTAGCAGTCCTCTATTGCGTACAT---CGAAAAATAGATGTGAAAGACACCAAGGAGGCTTTAGATAAGATAGAGGAAATA---CAAAACAAAAGTCAG---------CAAAAAACACAGCAAGCA------GCAGCTGATAAGGAAAAAGACAAC------------------AAGGTCAGTCAAAATTATCCTATAGTACAGAATGCTCAA---GGGCAGATGGTACACCAGGCCATATCA [...]
+>Ref.A2.CM.01.01CM_1445MV.GU201516
+------GCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCTTGGGAGAAAATTCGGTTAAGGCCAGGGGGAAGGAAAAAATATAGAATGAAACATTTAGTATGGGCAAGCAGGGAGCTGGAAAAATACTCAATCAACCCTGGTCTTTTAGAAACA---TCGGAAGGATGTAAACAAATAATAAGGCAGTTACATTCA---GCTCTCCCAGTAGGAACAGAAGAACTTAAATCACTATATAATACAATAGCAGTCCTCTACTATGTACAT---CAAAAAATAGAGGTAAAAGACACCAAGGAAGCCCTAGATAAATTAGAGGAGGAG---CAAAACAAATACAAG---------CAGAAGACACAGCAGGCA------GCAGCTGCCACAGGAAATAGCAGC------------------------AGTCAGAATTATCCCATAGTGCAAAATGCACAA---GGGCAAATGGTGCACCAGGCCATATCG [...]
+>Ref.A1.AU.03.PS1044_Day0.DQ676872
+ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAGATTAGATGCATGGGAGAAAATTCGGCTAAGGCCAGGGGGAAAGAAAAAATATAGACTAAAACATCTAGTATGGGCAAGCAGGGAGCTGGAGAGATTCGCACTTAAYCCTRGCCTTTTAGAATCA---GCAGAAGGATGTCAACAAATAATGGAACAGTTACAACCA---GCTCTYAAGACAGGAWCAGAAGAAATTAAATCATTATTTAATACAGTAGCAACCCTCTATTGTGTACAT---CAAAGGATAGATGTAAAAGACACCAAGGAAGCTYTAGATAAAATAGAGGAAATA---AAAAATAAGAGCAAG---------CAAAGGACTCAACAGGCA------GCAGCTGACACAGGAAACAGCGGC------------------AAGGTCAGCCAAAATTACCCTATAGTGCAAAATGCACAG---GGGCAAATGATACAYCAAAACTTGTCA [...]
+>Ref.J.SE.93.SE9280_7887.AF082394
+ATGGGTGCGAGAGCGTCAATATTAAGTGGGGGAAAATTAGATGATTGGGAAAAAATTCGGTTGAGGCCAGGGGGGAAGAAAAAATATAGGATAAAGCATCTAGTATGGGCAAGCAGGGAGCTGGACAGATTTGCACTTAACCCTGGCCTTCTAGAGTCA---GCAAAAGGCTGTCAACAAATACTAGTACAGCTCCAACCA---GCTCTCCAGACAGGAACACAAGAAATTAAATCATTGTATAATACAGTAGCAACCCTCTATTGCGTACAT---CAGAGGATAGAAATAAAAGACACCATGGAAGCTTTAGAGAAGATAGAGGAAATT---CAAAACAAGAACAAA---------CAGCAGGCACAGAAAGCA------GAAACTGACAAAAAAGACAACAGT------------------CAGGTCAGTCAAAATTATCCTATAGTGCAGAATCTGCAA---GGGCAACCGGTACACCAGGCCCTATCA [...]
+>Ref.F2.CM.95.95CM_MP257.AJ249237
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCGGGGGGGAAGAAAAAATATAGGCTGAAACATATAGTATGGGCAAGCAGGGAGCTAAAACGATTTGCACTTAATCCTGGCCTTTTAGAGACA---ACAGAAGGCTGTAAGAAAATAATAGGACAACTACAACCA---TCCCTTCAGACAGGGTCAGAGGAACTGAAATCATTATTTAACACAATAGTAGTTCTCTATTATGTACAT---CAAAAGATAGAGGTAAGAGACACCAAGGAAGCTTTAGATAAGCTACAGGAAGAA---CAAGACAAACATCAG---------CAAAAAACACAACAAGCA------ACGGCTGACAAAGGGGTCAGTAAA------------------GGGGTCAGTCAAAATTACCCTATACTACAAAATCTTCAG---GGGCAAATGGTACACCAGAGTCTATCA [...]
+>Ref.F2.CM.95.95CM_MP255.AJ249236
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGGTTAAAGCCGGGGGGAAAGAAAAGATATAGGCTAAAACATCTAGTATGGGCAAGCAGGGAACTAGAACGATTTGCACTTAATCCTAGCCTTTTAGAAACA---ACAGAAGGCTGTAAGAAAATAATAGGACAATTACAATCA---TCCCTTCAGACAGGATCAGAAGAGCTTAAATCACTATACAATGCAGTAGTAGTTCTCTATTATGTACAT---CAAAGGATAGATGTAAGAGACACCAAGGAAGCTTTAGATAAGCTACAGGAAGAA---CAAGATAAAAGTCAG---------CAAAAGGAACAACAAAAG------GCGGCTGACAAAGAG------------------------------GTCAGTCAAAATTACCCTATAGTGCAGAATATTCAG---GGGCAAATGGTACACCAGGCTCTATCA [...]
+>Ref.C.BR.92.BR025_d.U52953
+ATGGGTGCGAGAGCGTCAATATTAAGAGGCGGAAAATTAGATGCTTGGGAAAGAATTAAGTTAAAGCCAGGGGGAAAGAAACACTATATGATGAAACACCTAGTCTGGGCAAGCAGGGAGCTGGAAAGATTTGCACTTGACCCTGGCCTTTTAGAGACA---TCCGAAGGCTGTAAACAAATAATGAAACAGCTACAACCA---GCTCTTCAGACAGGAACAAAGGAACTTATATCATTACATAATACGGTTGCAACTCTCTATTGTGTACAT---GAAAAGATAGATGTACGAGACACCAAGGAAGCCTTAGACAAAATAAAGGAAGAA---CAAAACAAAAGTCAG---------CAAAAAACACAGCAGGCA---GAAGCGGCTGACAAAGGA---------------------------AAGGTCAGTCAAAATTATCCTATAGTACAGAATCTCCAA---GGGCAAATGGTACACCAGCCCATATCA [...]
+>Ref.G.KE.93.HH8793_12_1.AF061641
+ATGGGTGCGAGAGCSTCASTATTAAGCGGGGGAAAATTAGATGCATGGGAAAAAATTCGSCTGACGCCAGGGGGAAAGAAAAAATACAGACTGAAACATCTAGTATGGGCAAGCAGAGAGATGGAGAGATTTGCACTTAACCCTGGCCTTTTAGAAACA---GCAGAAGGTTGTCAACAAATAATGAGCCAGTTGCAACCA---GCTATCCAMACAGGAACAGAGGAGATTAAATCATTATTTAATACAGTAGCAACCCTCTATTGTGTACATCCC---AAGATAGAGGTAAAGGACACCAAAGAAGCTCTAGAGGAAGTAGAAAAGATA---CAAAAGAAAAGTCAG---------CAAAAAATACAGCAGGCA------GCAAGGGATGAAGGAAACAGCAGC------------------CAAGTCAGCCAAAATTATCCTATAGTGCAGAACGCACAA---GGACAGATGGTACACCAGGCCATATCA [...]
+>Ref.A2.CD.97.97CDKTB48.AF286238
+ATGGGTGCGAGAGCGTCAGTATTGAGCGGCGGAAAATTAGAAGCTTGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGACTGAAACATTTAGTATGGGCAAGCAGGGAGCTGGAAAAATTCTCAATCAACCCCAGCCTTTTAGAAACA---GAAACAGGATGTAGACGAATATTTGGGCAATTACAACCA---GCTCTCGAGACAGGAACAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTACTTTGTTCAT---CAAAAGATAGAGGTAAAAGACACCAAGGAAGCTCTAGATAAAATAGAGGAAGAA---CAAAACAAATGCAAG---------CAGAAGACACAGCAGGCA------GCAGCTGACACAGGAAGCAGCAGCAGTCAAAATTACAGAGGTAGCAGCAGTCAAAATTACCCTATAGTGCAAAATGCACAA---GGGCAAATGGTACACCAGGCCGTGTCA [...]
+>Ref.G.BE.96.DRCBL.AF084936
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCTTGGGAGAAAATTCGGTTGAGGCCAGGGGGAAAGAAAAGATATAGAATGAAACATTTAGTATGGGCAAGCAGGGAGCTGGACAGATTTGCACTTAACCCTGGCCTTTTAGAAACA---GCAGAAGGTTGTCAAAAAATAATGGCACAGTTGCAACCA---GCTCTCCAAACAGGAACAGAGGAGATTAAATCACTATTTAATACAGTAGCAACCCTCTATTGTGTACAT---CAAAAGATAGAGGTAAGAGACACCAAAGAGGCTCTAGAGGAAGTGGAAAAGATA---CAAAAGAAGAGTCAG---------CAAAAAGAAAACAGCAGC------AGC---------------------------------------CAAGTCAGTCAAAATTACCCTATAGTGCAGAATGCACAA---GGGCAAATGGTACACCAGGCCATATCA [...]
+>Ref.K.CD.97.97ZR_EQTB11.AJ249235
+ATGGGTGCGAGAGCTTCAGTATTAAGCGGGGGAAAATTAGACAAATGGGAAAAAATTCAGTTACGGCCAGGGGGAAAGAAAAAATACAGGCTAAAACATCTAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCACTTAACCCTAACCTTTTAGAGACA---GTAGAAGGCTGTCGGCAAATAATAAGACAACTACAACCA---TCCCTTCAAACAGGCTCGGAAGAGCTTAGATCACTATTTAATACAGTAGCAACCCTCTATTGGGTGCAT---CAAAGTATACAGGTAAGGGACACCAAGGAAGCCTTAGACAAACTAGAGGAAGAA---CAAAACAGAACTCAG---------CAAAAGACACAGCAAGGA------AAAGCTGACAAAGGG------------------------------GTTAGTCAAAATTACCCTATAGTACAGAATCTTCAG---GGGCAAATGGTACACCAGGCCCTATCA [...]
+>Ref.D.TZ.01.A280.AY253311
+------GCGAGAGCGTCAGTATTAAGCGGGGGACAATTAGATGCATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATCAACTAAAACATATAGTATGGGCAAGCAGGGAGTTAGAACGATTTGCACTTAATCCTGGCCTTTTAGAGACA---TCCGAAGGCTGTAAACAAATC!TA---------CAACCA---GCTATTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCAT---AGAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAAAAATTAGAGGAAGAG---CAAACCAAAAGTAAG---------AAAAAGGCACAGCAAGCA------ACAGCTGACACAGGAAGCAGCAGC------------------CAGGTCAGCCAAAATTATCCTATAGTGCAAAACCTACAG---GGGCAAATGGTACACCAGGCCATATCA [...]
+>Ref.F2.CM.02.02CM_0016BBY.AY371158
+------GCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGACTGGGAGAAAATTCGGTTAAGGCCGGGAGGGAAGAAAAAATATAGGCTAAAACATATAGTATGGGCAAGCAAGGAGCTAGAACGATTTGCACTTAATCCTGGCCTTTTAGAGACA---ACAGAAGGCTGTAAACAAATAATAGGACAACTACAA---TCATCCCTTCAGACAGGATCAGAAGAGATTAAATCATTATATAACACAGTAGCAGTCCTCTATTATGTACAT---CAAAAGATACAAATAAGAGACACCAAGGAAGCTTTAGATAAGCTACAGGAAGAA---CAAGACAAATATCAG---------CAAAAAACACAACCAGCA------GCGGCTGATAAAGGG------------------------------GTCAGTCAAAATTACCCTATAGTACAGAATCTTCAG---GGGCAAATGGTACATCAGGCTATATCA [...]
+>Ref.F1.BE.93.VI850.AF077336
+ATGGGTGCGAGAGCGTCAATATTAAGCGGGGGAAAATTAGATGAATGGGAAAAAATTCAGTTAAGGCCGGGGGGAAAGAAAAGATATAAAATGAAACATCTAATATGGGCAAGCAGGGAGCTAGAACGATTTGCACTTGATCCTGGCCTTCTAGAAACA---TCAGAAGGCTGTCAAAAAATAATAAGACAGCTACAACCA---TCCCTTCAGACAGGATCAGAAGAGCTTAAGTCATTATTTAATACAGTAGCAGTCCTCTATTATGTACAT---CAAAGGGCAGGGGTAACAGACACCAAGGAAGCTTTAGACAAGCTAGAGGAAGAA---CAAAACAAAAGTCAG---------CAAAAGACACAGCAAGCG------GCAGCTGACAAAGGG------------------------------GTCAGTCAAAATTACCCTATAGTACAGAATCTTCAG---GGACAAATGGTACACCAGTCTCTATCA [...]
+>Ref.B.NL.00.671_00T36.AY423387
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATAGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAGATATAAATTAAAACATATAGTATGGGCAAGCAGAGAGCTAGAACGATTCGCAGTTAATCCTGGCCTTTTAGAGACA---TCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCA---GCCCTTCAGACAGGATCAGAAGAACTTAAATCATTATTTAATACAGTAGCAACCCTCTATTGTGTGCAT---GCAAGGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAAAAAATAGAGGAAGAA---CAAAACAAAAGTAAGAAACGGGCACAGCAAGCACAGCAAGCA------GAAGCTGACGCAGGAAAAAACAAC------------------CCGGTCAGCCAGAATTACCCTATAGTGCAGAATCTCCAA---GGGCAAATGGTACATCAGGCCATATCA [...]
+>Ref.C.IN.95.95IN21068.AF067155
+ATGGGTGCGAGAGCGTCAATATTAAGAGGGGGAAAATTAGATAAATGGGAAAAAATTAGGTTAAGGCCAGGGGGAAAGAAACGCTATATGCTAAAACACCTAGTATGGGCAAGCAGGGAGCTGGACAGATTTGCAGTTAACCCTGGCCTTTTAGAGACA---GCAGAAGGCTGTAAACAAATAATAAAACAGCTACAACCA---GCTCTTCAGACAGGAACAGAGGAACTTAGATCATTATTCAACACAGTAGCAACTCTCTATTGTGTACAT---GCAGGGATAGAAGTACGAGACACCAAAGAAGCCTTAGACAAGATAGAAGAAGAA---CAAAACAAAATTAAG---------CAAAAAACACAGCAGGCA------AAAGAGGATGACGGG---------------------------AAGGTCAGTCAAAATTATCCTATAGTGCAGAATCTCCAA---GGGCAAATGGTACACCAAGCCATATCA [...]
+>Ref.B.US.98.1058_11.AY331295
+------------GCGTCAGTATTAAGCGGGGGAAAATTAGATACATGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTTGCACTTAACCCTGGCCTGTTAGAAACA---GCAGAAGGCTGTAGACAATTATTGGGACAGCTACAGCCA---TCCCTTCAAACAGGATCAGAAGAACTTAAATCATTATTTAATACAATAGCAACCCTCTATTGTGTACAT---CAAAGGATAGAGGTAAGAGACACCAAAGAGGCTTTAGACAAGATAGAGGAAGAG---CAAAACAAAAGTAAG---------AAAAAAGCACAGCAAGCAGCAGCTGCAGCTGACACAGGAAACAGCAGC------------------CAGGTCAGCCAAAATTACCCTATAGTGCAGAACCTCCAA---GGGCAAATGGTACATCAGGCCATATCA [...]
+>Ref.J.CD.97.J_97DC_KTB147.EF614151
+------------------------------------------GCATGGGAGAAAATTTGGCTGAGGCGACGGGGAGAGAAAAAATACAGGCGAAAACATATAGTATGGGCAAGCAGGGAGCTGGACAGATATGCACTTAACCCTGGCCTTCTA!!TAGT!GAGCAGAAGGCGGTGAACAGATACTAGTACAGATCCAACCA!GATCTTTA!!AACAGGAGCAGAGGAGATAAAATCATTATTTAACACAGAAGCAACCCTCTATTGTGTACAT!!T!AGAGGATAGACATAAGAGACACCAAGGAGGCTTTAGACAAGATAGAGGAA!CTTTA!!AAACAAAAGCAAG---------GAGAAAGCTAAGAAAGAA------GCTGTCAAAAAATACAACAGT---------------------CAGGTGAGTCACAATTATCCTATATTGCAAAATATG!!T!AAGGGGAACTAGTACACCAGGCCCTATCA [...]
+>Ref.J.CM.04.04CMU11421.GU237072
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGCGGAAAATTAGATACTTGGGAGAAAATTCGGTTGAGGCCAGGGGGGAAGAAACGTTATAGGCTAAAACATTTAGTATGGGCAAGCAGGGAGCTGGACAGATTTGCACTTAACCCTGRCCTTCTAGAAACA---TCAAAAGGCTGTCAACAAATATTAGTACAGCTCCAACCA---TCTTATCAAACAGGGACAGAAGAAATTAAGTCATTATATAACACAGTAGCAACCCTCTATTGCGTACAT---GAGGRSATAGAGGTAAAAGACACCAAGGAARCTTTAGACAAGATAGAGGAACTA---CAAAAGAAGAACAAG---------CAACAGGCACAGAAAGCA------GAARCTGACAAAAGRRACARCAGT------------------CAAGTCAGTCAAAATTATCCTATAGTGCAGAACATGCAA---GGGCAACCAGTACAYCARGCCCTATCA [...]
+>Ref.H.GB.00.00GBAC4001.FJ711703
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATGCTTGGGAGAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAGGCTAAAACATCTAGTATGGGCAAGCAGGGAGCTGGACAGATTTGCACTCAACCCCGACCTTTTAGAAACA---GCAGATGGCTGTCTAAAAATAYTAGGACAGATACAGCCA---GCTCTTCAGACAGGAACAGAAGAAATTAAATCGTTATTTAATCTAGTAGCAGTCCTCTATTGTGTACAT---CAGAAAATAGAGGTACAAGATACCAGTGAAGCTTTAAATAAGGTAAAGGAGATA---CAGAACAAGAACCAG---------CAAACAACACAGCAGGCA------ACAGCTRGTAAAGAGAAGGACAGC------------------AAGATCAGTCAAAATTATCCTATAGTACAGAATGCCCAA---GGGCAAATGGTACACCAGGCCATATCA [...]
+>Ref.G.NG.92.92NG083.U88826
+GTGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAAAATTAGATTCTTGGGAAAAAATTCGGTTAAGGCCAGGGGGAAGGAAAAAGTATAAACTAAAACATATAGTATGGGCAAGCAGGGAACTGGGGAGATTTGCACTTAACCGTGACCTTTTAGAAACA---GCAGAAGGTTGTGTGCAAATAATGAAACAGTTGCAACCA!GCTCTCTA!!GACAGGAACAGAGGAGCTTAGATCATTATTTAATACAGTAGCAACCCTCTACTGTGTACAT---CAAAAGATAGAGGTAAAAGACACCAAAGAAGCTCCAGAGGAAGTGGAAAAAATA---CAAAAGAACAGTCAG---------CAAGAAATACAGCAGGCA------GCAAAGAATGAAGGAAACAGTAAC------------------CCAGTCAGCCAAAATTATCCTATAGTGCAGAATGCACAA---GGGCAAATGATACATCAGGCCATATCA [...]
+>Ref.B.FR.83.HXB2_LAI_IIIB_BRU.K03455
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGAGAATTAGATCGATGGGAAAAAATTCGGTTAAGGCCAGGGGGAAAGAAAAAATATAAATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTGTTAGAAACA---TCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCA---TCCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAGTAGCAACCCTCTATTGTGTGCAT---CAAAGGATAGAGATAAAAGACACCAAGGAAGCTTTAGACAAGATAGAGGAAGAG---CAAAACAAAAGTAAG---------AAAAAAGCACAGCAAGCA------GCAGCTGACACAGGACACAGCAAT------------------CAGGTCAGCCAAAATTACCCTATAGTGCAGAACATCCAG---GGGCAAATGGTACATCAGGCCATATCA [...]
+>Ref.B.TH.90.BK132.AY173951
+ATGGGTGCGAGAGCGTCAGTATTAAGCGGGGGACAATTAGATAGATGGGAGAAAATTCGGTTACGGCCAGGGGGAAAGAAAAAATATAGATTAAAACATATAGTATGGGCAAGCAGGGAGCTAGAACGATTCGCAGTTAATCCTGGCCTATTGGAAACA---TCAGAAGGCTGTAGACAAATACTGGGACAGCTACAACCA---AGCCTTCAGACAGGATCAGAAGAACTTAGATCATTATATAATACAATAGCAGTCCTCTATTGTGTACAT---CAAAAGATAGAGGTAAAAGACACCAAGGAAGCTTTAGAGAAGATAGAGGAAGAA---CAAAACAAAAGTAAG---------AAAAAGGCACAGCAAGCA------GCAGCTAACACAGAAAACAGCAGC------------------CAGGTTAGCCAAAATTACCCTATAGTGCAAAATATGCAG---GGGCAAATGGTACATCAGGCCATATCA [...]
diff --git a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp b/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
index 431ae3c..8421ebc 100644
--- a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
@@ -99,7 +99,7 @@ optimization.ignore_parameter =
optimization.max_number_f_eval = 10000
# Precision to reach:
-optimization.tolerance = 1 //0.000001
+optimization.tolerance = 0.000001
# idem for error or warning messages:
optimization.message_handler = $(DATA).messages
diff --git a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp b/Examples/MaximumLikelihood/Codons/M0/ML.bpp
similarity index 91%
copy from Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
copy to Examples/MaximumLikelihood/Codons/M0/ML.bpp
index 431ae3c..1b0561a 100644
--- a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/M0/ML.bpp
@@ -42,16 +42,8 @@ init.brlen.method = Input
# ----------------------------------------------------------------------------------------
# See the manual for a description of the syntax and available options.
#
-model = YN98(kappa=1, omega=1, frequencies=F1X4)
-nonhomogeneous=one_per_branch
-#These lines are for the F1X4 option:
-#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa,\
-# YN98.freq_Codon.123_Full.theta, YN98.freq_Codon.123_Full.theta1, YN98.freq_Codon.123_Full.theta2
-#These lines are for the F3X4 option:
-nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa, YN98.freq*
-nonhomogeneous.stationarity=yes
-#Only if stationarity is set to false:
-nonhomogeneous.root_freq=
+model = YN98(kappa=1, omega=1.0, frequencies=F0)
+nonhomogeneous=no
rate_distribution = Uniform //Gamma(n=4, alpha=0.358)
@@ -99,7 +91,7 @@ optimization.ignore_parameter =
optimization.max_number_f_eval = 10000
# Precision to reach:
-optimization.tolerance = 1 //0.000001
+optimization.tolerance = 0.000001
# idem for error or warning messages:
optimization.message_handler = $(DATA).messages
diff --git a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp b/Examples/MaximumLikelihood/Codons/M1/ML.bpp
similarity index 89%
copy from Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
copy to Examples/MaximumLikelihood/Codons/M1/ML.bpp
index 431ae3c..e60badd 100644
--- a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/M1/ML.bpp
@@ -1,6 +1,6 @@
#Example data set adapted from PAML
#
-#logL = 65442.80353609310986939818
+#Initial log likelihood.................: -1069.68587126864
# Global variables:
DATA = lysozymeLarge
@@ -42,16 +42,8 @@ init.brlen.method = Input
# ----------------------------------------------------------------------------------------
# See the manual for a description of the syntax and available options.
#
-model = YN98(kappa=1, omega=1, frequencies=F1X4)
-nonhomogeneous=one_per_branch
-#These lines are for the F1X4 option:
-#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa,\
-# YN98.freq_Codon.123_Full.theta, YN98.freq_Codon.123_Full.theta1, YN98.freq_Codon.123_Full.theta2
-#These lines are for the F3X4 option:
-nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa, YN98.freq*
-nonhomogeneous.stationarity=yes
-#Only if stationarity is set to false:
-nonhomogeneous.root_freq=
+model = YNGKP_M1(kappa=1, omega=1.0, frequencies=F3X4, p0=1.0, initFreqs=observed)
+nonhomogeneous=no
rate_distribution = Uniform //Gamma(n=4, alpha=0.358)
@@ -79,7 +71,7 @@ likelihood.recursion_simple.compression = recursive
# Should we reestimate likelihood parameters? Tree topology will not be optimized.
# (recommanded)
-optimization = FullD(derivatives=Newton)
+optimization = D-Brent(derivatives=Newton, nstep=10)
# Tell if the parameter should be transformed in order to remove constraints.
# This can improves the optimization, but might be a bit slower.
@@ -93,13 +85,13 @@ optimization.final = none
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
-optimization.ignore_parameter =
+optimization.ignore_parameter = YNGKP_M1.freq*
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
# Precision to reach:
-optimization.tolerance = 1 //0.000001
+optimization.tolerance = 0.000001
# idem for error or warning messages:
optimization.message_handler = $(DATA).messages
diff --git a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp b/Examples/MaximumLikelihood/Codons/M2/ML.bpp
similarity index 89%
copy from Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
copy to Examples/MaximumLikelihood/Codons/M2/ML.bpp
index 431ae3c..8ffd713 100644
--- a/Examples/MaximumLikelihood/Codons/BranchModel/ML.bpp
+++ b/Examples/MaximumLikelihood/Codons/M2/ML.bpp
@@ -1,6 +1,6 @@
#Example data set adapted from PAML
#
-#logL = 65442.80353609310986939818
+#Initial log likelihood.................: -1068.04967173634
# Global variables:
DATA = lysozymeLarge
@@ -42,16 +42,8 @@ init.brlen.method = Input
# ----------------------------------------------------------------------------------------
# See the manual for a description of the syntax and available options.
#
-model = YN98(kappa=1, omega=1, frequencies=F1X4)
-nonhomogeneous=one_per_branch
-#These lines are for the F1X4 option:
-#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa,\
-# YN98.freq_Codon.123_Full.theta, YN98.freq_Codon.123_Full.theta1, YN98.freq_Codon.123_Full.theta2
-#These lines are for the F3X4 option:
-nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa, YN98.freq*
-nonhomogeneous.stationarity=yes
-#Only if stationarity is set to false:
-nonhomogeneous.root_freq=
+model = YNGKP_M2(kappa=1, omega0=0.5, omega2=2.0, frequencies=F3X4, theta1=0.33333, theta2=0.5, initFreqs=observed)
+nonhomogeneous=no
rate_distribution = Uniform //Gamma(n=4, alpha=0.358)
@@ -79,7 +71,7 @@ likelihood.recursion_simple.compression = recursive
# Should we reestimate likelihood parameters? Tree topology will not be optimized.
# (recommanded)
-optimization = FullD(derivatives=Newton)
+optimization = D-Brent(derivatives=Newton, nstep=10)
# Tell if the parameter should be transformed in order to remove constraints.
# This can improves the optimization, but might be a bit slower.
@@ -93,13 +85,13 @@ optimization.final = none
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
-optimization.ignore_parameter =
+optimization.ignore_parameter = YNGKP_M1.freq*
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
# Precision to reach:
-optimization.tolerance = 1 //0.000001
+optimization.tolerance = 0.000001
# idem for error or warning messages:
optimization.message_handler = $(DATA).messages
diff --git a/Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp b/Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp
index 773d677..2c47a19 100644
--- a/Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp
+++ b/Examples/MaximumLikelihood/Nucleotides/Homogeneous/ML.bpp
@@ -1,5 +1,5 @@
-#initial logL: -65639.1410465442
-#final logL: -65442.8035360931
+#initial logL: -65826.8571771722
+#final logL: -65428.449112789
# Global variables:
DATA = LSU
@@ -39,7 +39,7 @@ init.brlen.method = Input
# ----------------------------------------------------------------------------------------
# See the manual for a description of the syntax and available options.
#
-model = HKY85(kappa=2.843, theta=0.5, theta1=0.5, theta2=0.5, useObservedFrequencies=yes)
+model = HKY85(kappa=2.843, initFreqs=observed)
rate_distribution = Gamma(n=4, alpha=0.358)
diff --git a/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGG/MLNHGG.bpp b/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGG/MLNHGG.bpp
index d87f906..cc9b094 100644
--- a/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGG/MLNHGG.bpp
+++ b/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGG/MLNHGG.bpp
@@ -1,11 +1,11 @@
#Initial log likelihood.................: -65639.1410465442
-#Log likelihood.........................: -64547.3493038365
+#Log likelihood.........................: -64547.3486924172
#GC.theta...............................: 0.486755
#T92.theta_1............................: 0.438231
-#T92.theta_2............................: 0.778319
+#T92.theta_2............................: 0.778332
#etc
-#Gamma.alpha............................: 0.43608
-#1566.56user 0.36system 26:13.49elapsed 99%CPU
+#Gamma.alpha............................: 0.436095
+#2621.12user 0.63system 43:47.81elapsed 99%CPU
# Global variables:
diff --git a/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGeneral/MLNH.bpp b/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGeneral/MLNH.bpp
index dd27c38..53c045b 100644
--- a/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGeneral/MLNH.bpp
+++ b/Examples/MaximumLikelihood/Nucleotides/NonHomogeneousGeneral/MLNH.bpp
@@ -1,11 +1,20 @@
-#Initial log likelihood.................: -71011.8177084265
-#Log likelihood.........................: -65343.8442233959
-#GC.theta...............................: 0.483464
-#T92.kappa_1............................: 2.63716
-#T92.theta_1............................: 0.576654
-#T92.theta_2............................: 0.445837
-#Gamma.alpha............................: 0.439279
-#111.83user 0.07system 1:52.90elapsed 99%CPU
+#Initial log likelihood.................: -67066.6981049241
+#Log likelihood.........................: -65187.3822910926
+#Full.theta.............................: 0.481587
+#Full.theta1............................: 0.568231
+#Full.theta2............................: 0.591547
+#GTR.a_1................................: 1.27372
+#GTR.b_1................................: 0.667001
+#GTR.c_1................................: 0.344267
+#GTR.d_1................................: 0.339924
+#GTR.e_1................................: 0.37557
+#GTR.theta_1............................: 0.622987
+#GTR.theta1_1...........................: 0.469133
+#GTR.theta2_1...........................: 0.547965
+#GTR.theta_2............................: 0.491549
+#GTR.theta1_2...........................: 0.485613
+#GTR.theta2_2...........................: 0.555095
+#Gamma.alpha............................: 0.438647
# Global variables:
@@ -60,7 +69,7 @@ rate_distribution = Gamma(n=4, alpha=0.358)
nonhomogeneous.number_of_models = 2
# Set up each model:
-model1 = GTR(theta=0.5, useObservedFreqs=yes)
+model1 = GTR(initFreqs=observed)
model1.nodes_id=0:62 # The Ids of the nodes to which this model should be assigned.
model2 = GTR(a=model1.GTR.a,\
@@ -68,7 +77,7 @@ model2 = GTR(a=model1.GTR.a,\
c=model1.GTR.c,\
d=model1.GTR.d,\
e=model1.GTR.e,\
- theta=0.9, useObservedFreqs=yes)
+ initFreqs=observed)
model2.nodes_id=63:154
# Likelihood recursion option:
@@ -93,7 +102,7 @@ likelihood.recursion_simple.compression = recursive
optimization=FullD(derivatives=Newton)
optimization.reparametrization=no
optimization.verbose = 1
-optimization.ignore_parameter = RootPosition
+optimization.ignore_parameter =
optimization.max_number_f_eval = 10000
optimization.tolerance = 0.000001
optimization.message_handler = $(DATA).messages
diff --git a/Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp b/Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp
index a392526..369b74a 100644
--- a/Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp
+++ b/Examples/MaximumLikelihood/Proteins/Homogeneous/ML.bpp
@@ -1,5 +1,5 @@
-#initial logL: -5140.62864568291
-#final logL:
+#initial logL: -5140.62856257665
+#final logL: -4958.52926533168
# Global variables:
DATA = Myo
@@ -39,7 +39,7 @@ init.brlen.method = Input
# ----------------------------------------------------------------------------------------
# See the manual for a description of the syntax and available options.
#
-model = JTT92+F(useObservedFreqs=yes)
+model = JTT92+F(initFreqs=observed, initFreqs.observedPseudoCount=0.)
rate_distribution = Gamma(n=4, alpha=0.5)
diff --git a/Examples/README b/Examples/README
index 3844099..b6765a4 100644
--- a/Examples/README
+++ b/Examples/README
@@ -8,6 +8,10 @@ Data/
treeList.dnd A list of trees in newick format.
treeList2.dnd Another list of trees in newick format.
OutGroup.txt A list of taxons to be used with BppReRoot.
+ lysozymeLarge.fasta and .dnd Files from the PAML package.
+ Myo.mase Myoglobin sequence alignment, From dutheil and Galtier 2007
+ HIV1_REF_2010_gag_DNA.fasta Alignment file downloaded from the HIV database on 21/12/11
+ " " _macse realigned with Macse 0.8
..:: Options files ::..
@@ -25,4 +29,5 @@ These option files are split in their respective program directory:
SeqMan.bpp BppSeqMan option file.
PhySamp.bpp BppPhyloSampler option file.
ReRoot.bpp BppReRoot option file.
+ AlnScores.bpp BppAlnscore option file.
diff --git a/Examples/SequenceManipulation/SeqMan2.bpp b/Examples/SequenceManipulation/SeqMan2.bpp
new file mode 100644
index 0000000..2db2e3e
--- /dev/null
+++ b/Examples/SequenceManipulation/SeqMan2.bpp
@@ -0,0 +1,33 @@
+# Global variables:
+DATA = HIV1_REF_2010_gag
+
+# ---------------
+# Input sequences
+# ---------------
+
+# The alphabet to use:
+# [DNA|RNA|Protein|Generic]
+alphabet=Codon(code=Standard, letter=DNA)
+
+# The sequence file to use (sequences must be aligned!)
+input.sequence.file=../Data/$(DATA)_DNA.fasta
+
+# The file format:
+input.sequence.format=Fasta
+
+# ----------------
+# Output sequences
+# ----------------
+
+output.sequence.file=$(DATA)_AA.fasta
+
+# The file format:
+output.sequence.format=Fasta()
+
+# -----------------------
+# Sequences manipulations
+# -----------------------
+#For example (see manual for a detailed list of available options):
+sequence.manip = CoerceToAlignment,RemoveStops,KeepComplete(maxGapAllowed=30%),Translate(code=StandardGeneticCode)
+
+
diff --git a/Examples/SequenceSimulation/SeqGen.bpp b/Examples/SequenceSimulation/SeqGen.bpp
index 56bdc2b..b05133d 100644
--- a/Examples/SequenceSimulation/SeqGen.bpp
+++ b/Examples/SequenceSimulation/SeqGen.bpp
@@ -4,7 +4,7 @@ alphabet = DNA
# Input tree to use:
input.tree.file = ../Data/LSUrooted.dnd
-input.tree.Format=Newick
+input.tree.format=Newick
# Print a tree with ids as bootstrap values.
# This is helpful when setting up complexe non-homogeneous models.
diff --git a/bppSuite.spec b/bppSuite.spec
index 2287203..85723b7 100644
--- a/bppSuite.spec
+++ b/bppSuite.spec
@@ -1,5 +1,5 @@
%define name bppsuite
-%define version 0.6.1
+%define version 0.7.0
%define release 1
%define _prefix /usr
@@ -29,6 +29,7 @@ Bio++ program suite includes programs:
- BppPhySamp for phylogenetic sampling,
- BppReRoot for tree rerooting.
- BppTreeDraw for tree drawing.
+ - BppAlnScore for comparing alignments and computing alignment scores.
%prep
%setup -q
@@ -80,6 +81,10 @@ rm -rf $RPM_BUILD_ROOT
%{_prefix}/share/man/man1/bpptreedraw.1.gz
%changelog
+* Wed Feb 15 2012 Julien Dutheil <julien.dutheil at univ-montp2.fr>
+- BppSuite 0.7.0 release
+* Thu Jun 09 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr>
+- BppSuite 0.6.2 release
* Mon Feb 28 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr>
- BppSuite 0.6.1 release
* Mon Feb 07 2011 Julien Dutheil <julien.dutheil at univ-montp2.fr>
diff --git a/bppSuite/CMakeLists.txt b/bppSuite/CMakeLists.txt
index 77f24d6..594751a 100644
--- a/bppSuite/CMakeLists.txt
+++ b/bppSuite/CMakeLists.txt
@@ -42,6 +42,10 @@ ADD_EXECUTABLE(bpptreedraw bppTreeDraw.cpp)
TARGET_LINK_LIBRARIES(bpptreedraw ${LIBS})
SET_TARGET_PROPERTIES(bpptreedraw PROPERTIES LINK_SEARCH_END_STATIC ${BUILD_STATIC})
+ADD_EXECUTABLE(bppalnscore bppAlnScore.cpp)
+TARGET_LINK_LIBRARIES(bppalnscore ${LIBS})
+SET_TARGET_PROPERTIES(bppalnscore PROPERTIES LINK_SEARCH_END_STATIC ${BUILD_STATIC})
+
# Install progs
INSTALL(TARGETS
bppml
@@ -54,4 +58,5 @@ INSTALL(TARGETS
bppreroot
bppphysamp
bpptreedraw
+ bppalnscore
DESTINATION bin)
diff --git a/bppSuite/bppAlnScore.cpp b/bppSuite/bppAlnScore.cpp
new file mode 100644
index 0000000..7feb507
--- /dev/null
+++ b/bppSuite/bppAlnScore.cpp
@@ -0,0 +1,226 @@
+//
+// File: bppAlnScore.cpp
+// Created by: Julien Dutheil
+// Created on: Dec Thu 15 16:16 2011
+//
+
+/*
+Copyright or � or Copr. Bio++ Development Team
+
+This software is a computer program whose purpose is to simulate sequence
+data according to a phylogenetic tree and an evolutionary model.
+
+This software is governed by the CeCILL license under French law and
+abiding by the rules of distribution of free software. You can use,
+modify and/ or redistribute the software under the terms of the CeCILL
+license as circulated by CEA, CNRS and INRIA at the following URL
+"http://www.cecill.info".
+
+As a counterpart to the access to the source code and rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors have only limited
+liability.
+
+In this respect, the user's attention is drawn to the risks associated
+with loading, using, modifying and/or developing or reproducing the
+software by the user in light of its specific status of free software,
+that may mean that it is complicated to manipulate, and that also
+therefore means that it is reserved for developers and experienced
+professionals having in-depth computer knowledge. Users are therefore
+encouraged to load and test the software's suitability as regards their
+requirements in conditions enabling the security of their systems and/or
+data to be ensured and, more generally, to use and operate it in the
+same conditions as regards security.
+
+The fact that you are presently reading this means that you have had
+knowledge of the CeCILL license and that you accept its terms.
+*/
+
+// From the STL:
+#include <iostream>
+#include <fstream>
+#include <iomanip>
+
+using namespace std;
+
+#include <Bpp/App/BppApplication.h>
+#include <Bpp/App/ApplicationTools.h>
+#include <Bpp/Text/TextTools.h>
+
+// From SeqLib:
+#include <Bpp/Seq/SiteTools.h>
+#include <Bpp/Seq/Alphabet/Alphabet.h>
+#include <Bpp/Seq/App/SequenceApplicationTools.h>
+#include <Bpp/Seq/Io.all>
+#include <Bpp/Seq/Container.all>
+#include <Bpp/Seq/SequenceTools.h>
+
+using namespace bpp;
+
+void help()
+{
+ (*ApplicationTools::message << "__________________________________________________________________________").endLine();
+ (*ApplicationTools::message << "bppalnscore parameter1_name=parameter1_value").endLine();
+ (*ApplicationTools::message << " parameter2_name=parameter2_value ... param=option_file").endLine();
+ (*ApplicationTools::message).endLine();
+ (*ApplicationTools::message << " Refer to the Bio++ Program Suite Manual for a list of available options.").endLine();
+ (*ApplicationTools::message << "__________________________________________________________________________").endLine();
+}
+
+int main(int args, char** argv)
+{
+ cout << "******************************************************************" << endl;
+ cout << "* Bio++ Alignment Score, version 0.1 *" << endl;
+ cout << "* Author: J. Dutheil Last Modif. 15/12/11 *" << endl;
+ cout << "******************************************************************" << endl;
+ cout << endl;
+
+ if (args == 1)
+ {
+ help();
+ return 0;
+ }
+
+ try {
+
+ BppApplication bppalnscore(args, argv, "BppAlnScore");
+ bppalnscore.startTimer();
+
+ // Get alphabet
+ Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppalnscore.getParams(), "", false, true, true);
+
+ // Get the test alignment:
+ auto_ptr<SiteContainer> sitesTest(SequenceApplicationTools::getSiteContainer(alphabet, bppalnscore.getParams(), ".test", false, true));
+
+ // Get the reference alignment:
+ auto_ptr<SiteContainer> sitesRef(SequenceApplicationTools::getSiteContainer(alphabet, bppalnscore.getParams(), ".ref", false, true));
+
+ //We check if the two alignments are compatible:
+ vector<string> namesTest = sitesTest->getSequencesNames();
+ vector<string> namesRef = sitesRef->getSequencesNames();
+ if (namesTest != namesRef) {
+ ApplicationTools::displayTask("Reorder sequences in ref. alignment", true);
+ auto_ptr<AlignedSequenceContainer> tmp(new AlignedSequenceContainer(sitesRef->getAlphabet()));
+ for (size_t i = 0; i < namesTest.size(); ++i) {
+ ApplicationTools::displayGauge(i, namesTest.size() - 1);
+ try {
+ tmp->addSequence(sitesRef->getSequence(namesTest[i]));
+ } catch(SequenceNotFoundException& ex) {
+ throw Exception("ERROR!!! Reference alignment should contain the same sequences as the test alignment!");
+ }
+ }
+ ApplicationTools::displayTaskDone();
+ sitesRef = tmp;
+ }
+
+ //Build alignment indexes:
+ RowMatrix<unsigned int> indexTest, indexRef;
+ SiteContainerTools::getSequencePositions(*sitesTest, indexTest);
+ SiteContainerTools::getSequencePositions(*sitesRef, indexRef);
+
+ //Now build scores:
+ int na = ApplicationTools::getIntParameter("score.na", bppalnscore.getParams(), 0);
+ ApplicationTools::displayResult("NA value to used", na);
+ vector<int> cs = SiteContainerTools::getColumnScores(indexTest, indexRef, na);
+ vector<double> sps = SiteContainerTools::getSumOfPairsScores(indexTest, indexRef, static_cast<double>(na));
+
+ //Should scores be averaged for words?
+ size_t wsize = ApplicationTools::getParameter<size_t>("score.word_size", bppalnscore.getParams(), 1);
+ size_t phase = 0;
+ if (wsize > 1) {
+ ApplicationTools::displayResult("Scores uniformized for words of size", wsize);
+ string phaseOpt = ApplicationTools::getStringParameter("score.phase", bppalnscore.getParams(), "1");
+ if (TextTools::isDecimalInteger(phaseOpt)) {
+ phase = TextTools::toInt(phaseOpt);
+ if (phase == 0)
+ throw Exception("ERROR: positions are 1-based.");
+ phase--;
+ } else {
+ //We look for the first occurrence of the given motif:
+ try {
+ BasicSequence motif("motif", phaseOpt, sitesTest->getAlphabet());
+ ApplicationTools::displayResult("Phase based on 1st occurence of", motif.toString());
+ unsigned int pos = sitesTest->getNumberOfSites();
+ for (unsigned int i = 0; i < sitesTest->getNumberOfSequences(); ++i) {
+ unsigned int p = SequenceTools::findFirstOf(sitesTest->getSequence(i), motif);
+ if (p < pos)
+ pos = p;
+ }
+ phase = pos;
+ } catch(Exception& ex) {
+ throw Exception("Error, unvalid motif specified for phase option.");
+ }
+ }
+ ApplicationTools::displayResult("First word starts at", phase + 1);
+
+ //Now perform the smoothing:
+ size_t i;
+ for (i = 0; i < phase; ++i) {
+ cs[i] = 0;
+ sps[i] = 0;
+ }
+ for (; i + wsize <= cs.size(); i += wsize) {
+ //First compute minimum criterion:
+ int csmin = 1;
+ double spsmin = 1;
+ for (size_t j = i; j < i + wsize; ++j) {
+ if (cs[j] < csmin) csmin = cs[j];
+ if (sps[j] < spsmin) spsmin = sps[j];
+ }
+ //Assign min to all positions in word:
+ for (size_t j = i; j < i + wsize; ++j) {
+ cs[j] = csmin;
+ sps[j] = spsmin;
+ }
+ }
+ for (; i < cs.size(); ++i) {
+ cs[i] = 0;
+ sps[i] = 0;
+ }
+ }
+
+ //Output scores to file:
+ string outputScores = ApplicationTools::getAFilePath("output.scores", bppalnscore.getParams(), false, false);
+ if (outputScores != "none") {
+ ApplicationTools::displayResult("Output scores to", outputScores);
+ ofstream output(outputScores.c_str(), ios::out);
+ output << "Site\tColumnScore\tSumOfPairsScore" << endl;
+ for (size_t i = 0; i < cs.size(); ++i) {
+ output << sitesTest->getSite(i).getPosition() << "\t" << cs[i] << "\t" << sps[i] << endl;
+ }
+ output.close();
+ }
+
+ //Create a sequence filter:
+ string outputFilter = ApplicationTools::getAFilePath("output.mase", bppalnscore.getParams(), false, false);
+ if (outputFilter != "none") {
+ ApplicationTools::displayResult("Output mase with site filter to", outputFilter);
+ double spsThreshold = ApplicationTools::getDoubleParameter("output.sps_thresholds", bppalnscore.getParams(), 0.8);
+ ApplicationTools::displayResult("Threshold for SPS", spsThreshold);
+
+ MultiRange<unsigned int> csRanges;
+ MultiRange<unsigned int> spsRanges;
+ for (size_t i = 0; i < cs.size(); ++i) {
+ if (cs[i] == 1) csRanges.addRange(Range<unsigned int>(i, i + 1));
+ if (sps[i] >= spsThreshold) spsRanges.addRange(Range<unsigned int>(i, i + 1));
+ }
+
+ MaseHeader header;
+ header.setSiteSelection("CS", csRanges);
+ header.setSiteSelection("SPS", spsRanges);
+ Mase writer;
+ writer.writeMeta(outputFilter, *sitesTest, header);
+ }
+
+ //We're done!
+ bppalnscore.done();
+
+ } catch(exception & e) {
+ cout << e.what() << endl;
+ return 1;
+ }
+
+ return 0;
+}
+
diff --git a/bppSuite/bppAncestor.cpp b/bppSuite/bppAncestor.cpp
index f2b3687..4d0f723 100644
--- a/bppSuite/bppAncestor.cpp
+++ b/bppSuite/bppAncestor.cpp
@@ -90,9 +90,9 @@ void help()
int main(int args, char ** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Ancestral Sequence Reconstruction, version 0.4.0 *" << endl;
+ cout << "* Bio++ Ancestral Sequence Reconstruction, version 0.5.0 *" << endl;
cout << "* Authors: J. Dutheil Created on: 10/09/08 *" << endl;
- cout << "* B. Boussau Last Modif: 09/11/10 *" << endl;
+ cout << "* B. Boussau Last Modif: 17/06/11 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -271,109 +271,158 @@ int main(int args, char ** argv)
AncestralStateReconstruction *asr = 0;
bool probMethod = false;
- if (reconstruction == "marginal")
+ if (reconstruction == "none")
{
+ //do nothing
+ } else if (reconstruction == "marginal") {
asr = new MarginalAncestralStateReconstruction(tl);
probMethod = true;
- }
- else
+ } else
throw Exception("Unknown ancestral state reconstruction method: " + reconstruction);
- if (probMethod)
- {
- probs = ApplicationTools::getBooleanParameter("asr.probabilities", bppancestor.getParams(), false, "", true, false);
- ApplicationTools::displayResult("Output probabilities", probs ? "yes" : "no");
- }
+ string outputFile;
+ if (asr) {
+ if (probMethod)
+ {
+ probs = ApplicationTools::getBooleanParameter("asr.probabilities", bppancestor.getParams(), false, "", true, false);
+ ApplicationTools::displayResult("Output probabilities", probs ? "yes" : "no");
+ }
- // Write infos to file:
- string outputFile = ApplicationTools::getAFilePath("output.sites.file", bppancestor.getParams(), false, false);
- if (outputFile != "none")
- {
- ApplicationTools::displayResult("Output file for sites", outputFile);
- ofstream out(outputFile.c_str(), ios::out);
- TreeTemplate<Node> ttree(*tree);
- vector<Node *> nodes = ttree.getInnerNodes();
- unsigned int nbNodes = nodes.size();
+ // Write infos to file:
+ outputFile = ApplicationTools::getAFilePath("output.sites.file", bppancestor.getParams(), false, false);
+ if (outputFile != "none")
+ {
+ ApplicationTools::displayResult("Output file for sites", outputFile);
+ ofstream out(outputFile.c_str(), ios::out);
+ TreeTemplate<Node> ttree(*tree);
+ vector<Node *> nodes = ttree.getInnerNodes();
+ unsigned int nbNodes = nodes.size();
- // Get the rate class with maximum posterior probability:
- vector<unsigned int> classes = tl->getRateClassWithMaxPostProbOfEachSite();
- // Get the posterior rate, i.e. rate averaged over all posterior probabilities:
- Vdouble rates = tl->getPosteriorRateOfEachSite();
- // Get the ancestral sequences:
- vector<Sequence*> sequences(nbNodes);
- vector<VVdouble*> probabilities(nbNodes);
+ // Get the rate class with maximum posterior probability:
+ vector<unsigned int> classes = tl->getRateClassWithMaxPostProbOfEachSite();
+ // Get the posterior rate, i.e. rate averaged over all posterior probabilities:
+ Vdouble rates = tl->getPosteriorRateOfEachSite();
+ // Get the ancestral sequences:
+ vector<Sequence*> sequences(nbNodes);
+ vector<VVdouble*> probabilities(nbNodes);
+
+ vector<string> colNames;
+ colNames.push_back("Sites");
+ colNames.push_back("is.complete");
+ colNames.push_back("is.constant");
+ colNames.push_back("lnL");
+ colNames.push_back("rc");
+ colNames.push_back("pr");
+ for (unsigned int i = 0; i < nbNodes; i++) {
+ Node *node = nodes[i];
+ colNames.push_back("max." + TextTools::toString(node->getId()));
+ if (probs) {
+ probabilities[i] = new VVdouble();
+ //The cast will have to be updated when more probabilistic method will be available:
+ sequences[i] = dynamic_cast<MarginalAncestralStateReconstruction *>(asr)->getAncestralSequenceForNode(node->getId(), probabilities[i], false);
- vector<string> colNames;
- colNames.push_back("Sites");
- colNames.push_back("is.complete");
- colNames.push_back("is.constant");
- colNames.push_back("lnL");
- colNames.push_back("rc");
- colNames.push_back("pr");
- for (unsigned int i = 0; i < nbNodes; i++) {
- Node *node = nodes[i];
- colNames.push_back("max." + TextTools::toString(node->getId()));
- if (probs) {
- probabilities[i] = new VVdouble();
- //The cast will have to be updated when more probabilistic method will be available:
- sequences[i] = dynamic_cast<MarginalAncestralStateReconstruction *>(asr)->getAncestralSequenceForNode(node->getId(), probabilities[i], false);
-
- for (unsigned int j = 0; j < nbStates; j++) {
- colNames.push_back("prob." + TextTools::toString(node->getId()) + "." + alphabet->intToChar((int)j));
+ for (unsigned int j = 0; j < nbStates; j++) {
+ colNames.push_back("prob." + TextTools::toString(node->getId()) + "." + alphabet->intToChar((int)j));
+ }
}
- }
- else
- {
- if (node->isLeaf()) {
+ else
+ {
+ if (node->isLeaf()) {
- } else {
- sequences[i] = asr->getAncestralSequenceForNode(node->getId());
+ } else {
+ sequences[i] = asr->getAncestralSequenceForNode(node->getId());
+ }
}
}
- }
- //Now fill the table:
- vector<string> row(colNames.size());
- DataTable* infos = new DataTable(colNames);
+ //Now fill the table:
+ vector<string> row(colNames.size());
+ DataTable* infos = new DataTable(colNames);
- for (unsigned int i = 0; i < sites->getNumberOfSites(); i++)
- {
- double lnL = tl->getLogLikelihoodForASite(i);
- const Site* currentSite = &sites->getSite(i);
- int currentSitePosition = currentSite->getPosition();
- string isCompl = "NA";
- string isConst = "NA";
- try { isCompl = (SiteTools::isComplete(*currentSite) ? "1" : "0"); }
- catch(EmptySiteException& ex) {}
- try { isConst = (SiteTools::isConstant(*currentSite) ? "1" : "0"); }
- catch(EmptySiteException& ex) {}
- row[0] = (string("[" + TextTools::toString(currentSitePosition) + "]"));
- row[1] = isCompl;
- row[2] = isConst;
- row[3] = TextTools::toString(lnL);
- row[4] = TextTools::toString(classes[i]);
- row[5] = TextTools::toString(rates[i]);
-
- unsigned int k = 6;
- for (unsigned int j = 0; j < nbNodes; j++) {
- row[k] = sequences[j]->getChar(i);
- k++;
- if (probs) {
- for (unsigned int l = 0; l < nbStates; l++) {
- row[k] = TextTools::toString((*probabilities[j])[i][l]);
- k++;
+ for (unsigned int i = 0; i < sites->getNumberOfSites(); i++)
+ {
+ double lnL = tl->getLogLikelihoodForASite(i);
+ const Site* currentSite = &sites->getSite(i);
+ int currentSitePosition = currentSite->getPosition();
+ string isCompl = "NA";
+ string isConst = "NA";
+ try { isCompl = (SiteTools::isComplete(*currentSite) ? "1" : "0"); }
+ catch(EmptySiteException& ex) {}
+ try { isConst = (SiteTools::isConstant(*currentSite) ? "1" : "0"); }
+ catch(EmptySiteException& ex) {}
+ row[0] = (string("[" + TextTools::toString(currentSitePosition) + "]"));
+ row[1] = isCompl;
+ row[2] = isConst;
+ row[3] = TextTools::toString(lnL);
+ row[4] = TextTools::toString(classes[i]);
+ row[5] = TextTools::toString(rates[i]);
+
+ unsigned int k = 6;
+ for (unsigned int j = 0; j < nbNodes; j++) {
+ row[k] = sequences[j]->getChar(i);
+ k++;
+ if (probs) {
+ for (unsigned int l = 0; l < nbStates; l++) {
+ row[k] = TextTools::toString((*probabilities[j])[i][l]);
+ k++;
+ }
}
}
+
+ infos->addRow(row);
}
- infos->addRow(row);
+ DataTable::write(*infos, out, "\t");
+
+ delete infos;
}
- DataTable::write(*infos, out, "\t");
+ SiteContainer* asSites = 0;
+ if (probMethod)
+ {
+ bool sample = ApplicationTools::getBooleanParameter("asr.sample", bppancestor.getParams(), false, "", true, false);
+ ApplicationTools::displayResult("Sample from posterior distribution", sample ? "yes" : "no");
+ if (sample)
+ {
+ unsigned int nbSamples = ApplicationTools::getParameter<unsigned int>("asr.sample.number", bppancestor.getParams(), 1, "", true, false);
+ asSites = new AlignedSequenceContainer(alphabet);
+ for (unsigned int i = 0; i < nbSamples; i++)
+ {
+ ApplicationTools::displayGauge(i, nbSamples-1, '=');
+ SequenceContainer *sampleSites = dynamic_cast<MarginalAncestralStateReconstruction *>(asr)->getAncestralSequences(true);
+ vector<string> names = sampleSites->getSequencesNames();
+ for (unsigned int j = 0; j < names.size(); j++)
+ names[j] += "_" + TextTools::toString(i+1);
+ sampleSites->setSequencesNames(names, false);
+ SequenceContainerTools::append(*asSites, *sampleSites);
+ delete sampleSites;
+ }
+ ApplicationTools::message->endLine();
+ }
+ else
+ {
+ asSites = asr->getAncestralSequences();
+ }
+ }
+ else
+ {
+ asSites = asr->getAncestralSequences();
+ }
+
+ //Add existing sequence to output?
+ bool addExtant = ApplicationTools::getBooleanParameter("asr.add_extant", bppancestor.getParams(), false, "", true, false);
+ if (addExtant) {
+ SequenceContainerTools::append(*asSites, *sites);
+ }
- delete infos;
+ //Write output:
+ if (ApplicationTools::getStringParameter("output.sequence.file", bppancestor.getParams(), "none") != "none") {
+ SequenceApplicationTools::writeAlignmentFile(*asSites, bppancestor.getParams());
+ }
+ delete asSites;
+
+ delete asr;
}
-
outputFile = ApplicationTools::getAFilePath("output.nodes.file", bppancestor.getParams(), false, false);
if (outputFile != "none")
@@ -381,15 +430,18 @@ int main(int args, char ** argv)
ApplicationTools::displayResult("Output file for nodes", outputFile);
ofstream out(outputFile.c_str(), ios::out);
+ //Add existing sequence to output?
+ bool addExtant = ApplicationTools::getBooleanParameter("output.nodes.add_extant", bppancestor.getParams(), false, "", true, false);
+
map<int, vector<double> > frequencies;
- TreeLikelihoodTools::getAncestralFrequencies(*tl, frequencies, false);
+ TreeLikelihoodTools::getAncestralFrequencies(*tl, frequencies, addExtant);
vector<string> colNames;
colNames.push_back("Nodes");
for (unsigned int i = 0; i < tl->getNumberOfStates(); i++)
- colNames.push_back("exp" + TextTools::toString(i));
+ colNames.push_back("exp" + alphabet->intToChar(i));
for (unsigned int i = 0; i < tl->getNumberOfStates(); i++)
- colNames.push_back("eb" + TextTools::toString(i));
+ colNames.push_back("eb" + alphabet->intToChar(i));
//Now fill the table:
vector<string> row(colNames.size());
@@ -416,50 +468,6 @@ int main(int args, char ** argv)
}
-
- SiteContainer* asSites = 0;
- if (probMethod)
- {
- bool sample = ApplicationTools::getBooleanParameter("asr.sample", bppancestor.getParams(), false, "", true, false);
- ApplicationTools::displayResult("Sample from posterior distribution", sample ? "yes" : "no");
- if (sample)
- {
- unsigned int nbSamples = ApplicationTools::getParameter<unsigned int>("asr.sample.number", bppancestor.getParams(), 1, "", true, false);
- asSites = new AlignedSequenceContainer(alphabet);
- for (unsigned int i = 0; i < nbSamples; i++)
- {
- ApplicationTools::displayGauge(i, nbSamples-1, '=');
- SequenceContainer *sampleSites = dynamic_cast<MarginalAncestralStateReconstruction *>(asr)->getAncestralSequences(true);
- vector<string> names = sampleSites->getSequencesNames();
- for (unsigned int j = 0; j < names.size(); j++)
- names[j] += "_" + TextTools::toString(i+1);
- sampleSites->setSequencesNames(names, false);
- SequenceContainerTools::append(*asSites, *sampleSites);
- delete sampleSites;
- }
- ApplicationTools::message->endLine();
- }
- else
- {
- asSites = asr->getAncestralSequences();
- }
- }
- else
- {
- asSites = asr->getAncestralSequences();
- }
-
- //Add existing sequence to output?
- bool addExtant = ApplicationTools::getBooleanParameter("asr.add_extant", bppancestor.getParams(), false, "", true, false);
- if (addExtant) {
- SequenceContainerTools::append(*asSites, *sites);
- }
-
- //Write output:
- SequenceApplicationTools::writeAlignmentFile(*asSites, bppancestor.getParams());
- delete asSites;
-
- delete asr;
delete alphabet;
delete sites;
if(model) delete model;
diff --git a/bppSuite/bppML.cpp b/bppSuite/bppML.cpp
index 5dbd077..10643e7 100644
--- a/bppSuite/bppML.cpp
+++ b/bppSuite/bppML.cpp
@@ -216,8 +216,6 @@ int main(int args, char** argv)
}
DiscreteRatesAcrossSitesTreeLikelihood* tl;
- string optimizeClock = ApplicationTools::getStringParameter("optimization.clock", bppml.getParams(), "no", "", true, false);
- ApplicationTools::displayResult("Clock", optimizeClock);
string nhOpt = ApplicationTools::getStringParameter("nonhomogeneous", bppml.getParams(), "no", "", true, false);
ApplicationTools::displayResult("Heterogeneous model", nhOpt);
@@ -228,8 +226,10 @@ int main(int args, char** argv)
SubstitutionModelSet* modelSet = 0;
DiscreteDistribution* rDist = 0;
- if (optimizeClock == "global")
+ if (optimizeTopo || nbBS > 0)
{
+ if (nhOpt != "no")
+ throw Exception("Topology estimation with NH model not supported yet, sorry :(");
model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppml.getParams());
if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
@@ -242,164 +242,140 @@ int main(int args, char** argv)
rDist = PhylogeneticsApplicationTools::getRateDistribution(bppml.getParams());
}
if (dynamic_cast<MixedSubstitutionModel*>(model) == 0)
- tl = new RHomogeneousClockTreeLikelihood(*tree, *sites, model, rDist, true, true);
+ tl = new NNIHomogeneousTreeLikelihood(*tree, *sites, model, rDist, true, true);
else
- throw Exception("Molecular clock with Mixed model not supported yet, sorry :(");
+ throw Exception("Topology estimation with Mixed model not supported yet, sorry :(");
}
- else if (optimizeClock == "no")
+ else if (nhOpt == "no")
{
- if (optimizeTopo || nbBS > 0)
+ model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppml.getParams());
+ if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
+ if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
{
- if (nhOpt != "no")
- throw Exception("Topology estimation with NH model not supported yet, sorry :(");
- model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppml.getParams());
- if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
- if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
- {
- // Markov-modulated Markov model!
- rDist = new ConstantDistribution(1., true);
- }
- else
- {
- rDist = PhylogeneticsApplicationTools::getRateDistribution(bppml.getParams());
- }
- if (dynamic_cast<MixedSubstitutionModel*>(model) == 0)
- tl = new NNIHomogeneousTreeLikelihood(*tree, *sites, model, rDist, true, true);
- else
- throw Exception("Topology estimation with Mixed model not supported yet, sorry :(");
+ // Markov-modulated Markov model!
+ rDist = new ConstantDistribution(1., true);
}
- else if (nhOpt == "no")
+ else
{
- model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppml.getParams());
- if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
- if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
- {
- // Markov-modulated Markov model!
- rDist = new ConstantDistribution(1., true);
- }
- else
- {
- rDist = PhylogeneticsApplicationTools::getRateDistribution(bppml.getParams());
- }
- string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, false);
- ApplicationTools::displayResult("Likelihood recursion", recursion);
- if (recursion == "simple")
- {
- string compression = ApplicationTools::getStringParameter("likelihood.recursion_simple.compression", bppml.getParams(), "recursive", "", true, false);
- ApplicationTools::displayResult("Likelihood data compression", compression);
- if (compression == "simple")
- if (dynamic_cast<MixedSubstitutionModel*>(model) == 0)
- tl = new RHomogeneousTreeLikelihood(*tree, *sites, model, rDist, true, true, false);
- else
- tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true, true, false);
-
- else if (compression == "recursive")
- if (dynamic_cast<MixedSubstitutionModel*>(model) == 0)
- tl = new RHomogeneousTreeLikelihood(*tree, *sites, model, rDist, true, true, true);
- else
- tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true, true, true);
-
- else throw Exception("Unknown likelihood data compression method: " + compression);
- }
- else if (recursion == "double")
- {
+ rDist = PhylogeneticsApplicationTools::getRateDistribution(bppml.getParams());
+ }
+ string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, false);
+ ApplicationTools::displayResult("Likelihood recursion", recursion);
+ if (recursion == "simple")
+ {
+ string compression = ApplicationTools::getStringParameter("likelihood.recursion_simple.compression", bppml.getParams(), "recursive", "", true, false);
+ ApplicationTools::displayResult("Likelihood data compression", compression);
+ if (compression == "simple")
if (dynamic_cast<MixedSubstitutionModel*>(model) == 0)
- tl = new DRHomogeneousTreeLikelihood(*tree, *sites, model, rDist, true);
+ tl = new RHomogeneousTreeLikelihood(*tree, *sites, model, rDist, false, true, false);
else
- tl = new DRHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true);
- }
- else throw Exception("Unknown recursion option: " + recursion);
+ tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, false, true, false);
+
+ else if (compression == "recursive")
+ if (dynamic_cast<MixedSubstitutionModel*>(model) == 0)
+ tl = new RHomogeneousTreeLikelihood(*tree, *sites, model, rDist, false, true, true);
+ else
+ tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, false, true, true);
+
+ else throw Exception("Unknown likelihood data compression method: " + compression);
}
- else if (nhOpt == "one_per_branch")
+ else if (recursion == "double")
{
- model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppml.getParams());
- if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
- if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
- {
- // Markov-modulated Markov model!
- rDist = new ConstantDistribution(1., true);
- }
+ if (dynamic_cast<MixedSubstitutionModel*>(model) == 0)
+ tl = new DRHomogeneousTreeLikelihood(*tree, *sites, model, rDist, true);
else
- {
- rDist = PhylogeneticsApplicationTools::getRateDistribution(bppml.getParams());
- }
- vector<double> rateFreqs;
- if (model->getNumberOfStates() != alphabet->getSize())
- {
- // Markov-Modulated Markov Model...
- unsigned int n = (unsigned int)(model->getNumberOfStates() / alphabet->getSize());
- rateFreqs = vector<double>(n, 1. / (double)n); // Equal rates assumed for now, may be changed later (actually, in the most general case,
- // we should assume a rate distribution for the root also!!!
- }
+ tl = new DRHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true);
+ }
+ else throw Exception("Unknown recursion option: " + recursion);
+ }
+ else if (nhOpt == "one_per_branch")
+ {
+ model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, sites, bppml.getParams());
+ if (model->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
+ if (model->getNumberOfStates() >= 2 * model->getAlphabet()->getSize())
+ {
+ // Markov-modulated Markov model!
+ rDist = new ConstantDistribution(1., true);
+ }
+ else
+ {
+ rDist = PhylogeneticsApplicationTools::getRateDistribution(bppml.getParams());
+ }
+ vector<double> rateFreqs;
+ if (model->getNumberOfStates() != alphabet->getSize())
+ {
+ // Markov-Modulated Markov Model...
+ unsigned int n = (unsigned int)(model->getNumberOfStates() / alphabet->getSize());
+ rateFreqs = vector<double>(n, 1. / static_cast<double>(n)); // Equal rates assumed for now, may be changed later (actually, in the most general case,
+ // we should assume a rate distribution for the root also!!!
+ }
- bool stationarity = ApplicationTools::getBooleanParameter("nonhomogeneous.stationarity", bppml.getParams(), false, "", false, false);
- FrequenciesSet* rootFreqs = 0;
- if (!stationarity)
- {
- rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, sites, bppml.getParams(), rateFreqs);
- stationarity = !rootFreqs;
- }
- ApplicationTools::displayBooleanResult("Stationarity assumed", stationarity);
+ bool stationarity = ApplicationTools::getBooleanParameter("nonhomogeneous.stationarity", bppml.getParams(), false, "", false, false);
+ FrequenciesSet* rootFreqs = 0;
+ if (!stationarity)
+ {
+ rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, sites, bppml.getParams(), rateFreqs);
+ stationarity = !rootFreqs;
+ }
+ ApplicationTools::displayBooleanResult("Stationarity assumed", stationarity);
- vector<string> globalParameters = ApplicationTools::getVectorParameter<string>("nonhomogeneous_one_per_branch.shared_parameters", bppml.getParams(), ',', "");
- for (unsigned int i = 0; i < globalParameters.size(); i++)
- ApplicationTools::displayResult("Global parameter", globalParameters[i]);
- modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, globalParameters);
- model = 0;
-
- string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, false);
- ApplicationTools::displayResult("Likelihood recursion", recursion);
- if (recursion == "simple")
- {
- if (modelSet->hasMixedSubstitutionModel())
- tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true, true);
- else
- tl = new RNonHomogeneousTreeLikelihood(*tree, *sites, modelSet, rDist, true, true);
- }
- else if (recursion == "double")
- {
- if (modelSet->hasMixedSubstitutionModel())
- throw Exception("Double recursion with non homogeneous mixed models is not implemented yet.");
- // tl = new DRNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true);
- else
- tl = new DRNonHomogeneousTreeLikelihood(*tree, *sites, modelSet, rDist, true);
- }
- else throw Exception("Unknown recursion option: " + recursion);
+ vector<string> globalParameters = ApplicationTools::getVectorParameter<string>("nonhomogeneous_one_per_branch.shared_parameters", bppml.getParams(), ',', "");
+ for (unsigned int i = 0; i < globalParameters.size(); i++)
+ ApplicationTools::displayResult("Global parameter", globalParameters[i]);
+ modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, globalParameters);
+ model = 0;
+
+ string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, false);
+ ApplicationTools::displayResult("Likelihood recursion", recursion);
+ if (recursion == "simple")
+ {
+ if (dynamic_cast<MixedSubstitutionModelSet*>(modelSet)!=NULL)
+ tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, dynamic_cast<MixedSubstitutionModelSet*>(modelSet), rDist, true, true);
+ else
+ tl = new RNonHomogeneousTreeLikelihood(*tree, *sites, modelSet, rDist, true, true);
}
- else if (nhOpt == "general")
+ else if (recursion == "double")
{
- modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, sites, bppml.getParams());
- if (modelSet->getModel(0)->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
- if (modelSet->getNumberOfStates() >= 2 * modelSet->getAlphabet()->getSize())
- {
- // Markov-modulated Markov model!
- rDist = new ConstantDistribution(1., true);
- }
+ if (dynamic_cast<MixedSubstitutionModelSet*>(modelSet)!=NULL)
+ throw Exception("Double recursion with non homogeneous mixed models is not implemented yet.");
+ // tl = new DRNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true);
else
- {
- rDist = PhylogeneticsApplicationTools::getRateDistribution(bppml.getParams());
- }
+ tl = new DRNonHomogeneousTreeLikelihood(*tree, *sites, modelSet, rDist, true);
+ }
+ else throw Exception("Unknown recursion option: " + recursion);
+ }
+ else if (nhOpt == "general")
+ {
+ modelSet = PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, sites, bppml.getParams());
+ if (modelSet->getModel(0)->getName() != "RE08") SiteContainerTools::changeGapsToUnknownCharacters(*sites);
+ if (modelSet->getNumberOfStates() >= 2 * modelSet->getAlphabet()->getSize())
+ {
+ // Markov-modulated Markov model!
+ rDist = new ConstantDistribution(1., true);
+ }
+ else
+ {
+ rDist = PhylogeneticsApplicationTools::getRateDistribution(bppml.getParams());
+ }
- string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, false);
- ApplicationTools::displayResult("Likelihood recursion", recursion);
- if (recursion == "simple")
- {
- if (modelSet->hasMixedSubstitutionModel())
- tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true, true);
- else
- tl = new RNonHomogeneousTreeLikelihood(*tree, *sites, modelSet, rDist, true, true);
- }
- else if (recursion == "double")
- if (modelSet->hasMixedSubstitutionModel())
- throw Exception("Double recursion with non homogeneous mixed models is not implemented yet.");
- // tl = new DRNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true);
- else
- tl = new DRNonHomogeneousTreeLikelihood(*tree, *sites, modelSet, rDist, true);
- else throw Exception("Unknown recursion option: " + recursion);
+ string recursion = ApplicationTools::getStringParameter("likelihood.recursion", bppml.getParams(), "simple", "", true, false);
+ ApplicationTools::displayResult("Likelihood recursion", recursion);
+ if (recursion == "simple")
+ {
+ if (dynamic_cast<MixedSubstitutionModelSet*>(modelSet)!=NULL)
+ tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, dynamic_cast<MixedSubstitutionModelSet*>(modelSet), rDist, true, true);
+ else
+ tl = new RNonHomogeneousTreeLikelihood(*tree, *sites, modelSet, rDist, true, true);
}
- else throw Exception("Unknown option for nonhomogeneous: " + nhOpt);
+ else if (recursion == "double")
+ if (dynamic_cast<MixedSubstitutionModelSet*>(modelSet))
+ throw Exception("Double recursion with non homogeneous mixed models is not implemented yet.");
+ // tl = new DRNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true);
+ else
+ tl = new DRNonHomogeneousTreeLikelihood(*tree, *sites, modelSet, rDist, true);
+ else throw Exception("Unknown recursion option: " + recursion);
}
- else throw Exception("Unknown option for optimization.clock: " + optimizeClock);
+ else throw Exception("Unknown option for nonhomogeneous: " + nhOpt);
tl->initialize();
@@ -487,16 +463,8 @@ int main(int args, char** argv)
}
}
- if (optimizeClock == "global")
- {
- PhylogeneticsApplicationTools::optimizeParameters(
- dynamic_cast<DiscreteRatesAcrossSitesClockTreeLikelihood*>(tl), tl->getParameters(), bppml.getParams());
- }
- else
- {
- tl = dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>(
- PhylogeneticsApplicationTools::optimizeParameters(tl, tl->getParameters(), bppml.getParams()));
- }
+ tl = dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>(
+ PhylogeneticsApplicationTools::optimizeParameters(tl, tl->getParameters(), bppml.getParams()));
tree = new TreeTemplate<Node>(tl->getTree());
PhylogeneticsApplicationTools::writeTree(*tree, bppml.getParams());
@@ -526,6 +494,9 @@ int main(int args, char** argv)
out << "# Log likelihood = ";
out.setPrecision(20) << (-tl->getValue());
out.endLine();
+ out << "# Number of sites = ";
+ out.setPrecision(20) << sites->getNumberOfSites();
+ out.endLine();
out.endLine();
out << "# Substitution model parameters:";
out.endLine();
@@ -601,11 +572,12 @@ int main(int args, char** argv)
// Bootstrap:
- if (nbBS > 0 && optimizeClock != "no")
+ string optimizeClock = ApplicationTools::getStringParameter("optimization.clock", bppml.getParams(), "None", "", true, false);
+ if (nbBS > 0 && optimizeClock != "None")
{
ApplicationTools::displayError("Bootstrap is not supported with clock trees.");
}
- if (nbBS > 0 && optimizeClock == "no")
+ if (nbBS > 0 && optimizeClock == "None")
{
ApplicationTools::displayResult("Number of bootstrap samples", TextTools::toString(nbBS));
bool approx = ApplicationTools::getBooleanParameter("bootstrap.approximate", bppml.getParams(), true);
diff --git a/bppSuite/bppSeqMan.cpp b/bppSuite/bppSeqMan.cpp
index 285756a..02f1b63 100644
--- a/bppSuite/bppSeqMan.cpp
+++ b/bppSuite/bppSeqMan.cpp
@@ -5,7 +5,7 @@
//
/*
-Copyright or � or Copr. CNRS
+Copyright or � or Copr. Bio++ Development Team
This software is a computer program whose purpose is to simulate sequence
data according to a phylogenetic tree and an evolutionary model.
@@ -59,6 +59,10 @@ using namespace std;
#include <Bpp/Seq/SequenceTools.h>
#include <Bpp/Seq/GeneticCode.all>
+//From PhylLib:
+#include <Bpp/Phyl/Tree.h>
+#include <Bpp/Phyl/App/PhylogeneticsApplicationTools.h>
+
using namespace bpp;
void help()
@@ -74,8 +78,8 @@ void help()
int main(int args, char** argv)
{
cout << "******************************************************************" << endl;
- cout << "* Bio++ Sequence Manipulator, version 0.4 *" << endl;
- cout << "* Author: J. Dutheil Last Modif. 07/02/11 *" << endl;
+ cout << "* Bio++ Sequence Manipulator, version 0.6 *" << endl;
+ cout << "* Author: J. Dutheil Last Modif. 21/12/11 *" << endl;
cout << "******************************************************************" << endl;
cout << endl;
@@ -197,15 +201,15 @@ int main(int args, char** argv)
// +-------------+
else if (cmdName == "Translate")
{
- if (!AlphabetTools::isNucleicAlphabet(sequences->getAlphabet()))
- throw Exception("Error in translation: alphabet is not of type 'nucleic'.");
+ if (!AlphabetTools::isCodonAlphabet(sequences->getAlphabet()))
+ throw Exception("Error in translation: alphabet is not of type 'codon'.");
GeneticCode* gc = NULL;
string gcstr = ApplicationTools::getStringParameter("code", cmdArgs, "Standard");
- gc = SequenceApplicationTools::getGeneticCode(dynamic_cast<const NucleicAlphabet *>(sequences->getAlphabet()), gcstr);
+ gc = SequenceApplicationTools::getGeneticCode(dynamic_cast<const CodonAlphabet*>(sequences->getAlphabet())->getNucleicAlphabet(), gcstr);
OrderedSequenceContainer* sc = 0;
- if (aligned) sc = new VectorSiteContainer(sequences->getAlphabet());
- else sc = reinterpret_cast<OrderedSequenceContainer*>(new VectorSequenceContainer(sequences->getAlphabet()));
+ if (aligned) sc = new VectorSiteContainer(&AlphabetTools::PROTEIN_ALPHABET);
+ else sc = reinterpret_cast<OrderedSequenceContainer*>(new VectorSequenceContainer(&AlphabetTools::PROTEIN_ALPHABET));
for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
{
Sequence* seq = gc->translate(sequences->getSequence(i));
@@ -223,9 +227,9 @@ int main(int args, char** argv)
VectorSequenceContainer* sc = new VectorSequenceContainer(sequences->getAlphabet());
for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
{
- Sequence* seq = SequenceTools::removeGaps(sequences->getSequence(i));
+ auto_ptr<Sequence> seq(sequences->getSequence(i).clone());
+ SequenceTools::removeGaps(*seq);
sc->addSequence(*seq);
- delete seq;
}
delete sequences;
sequences = sc;
@@ -273,10 +277,40 @@ int main(int args, char** argv)
// +--------------+
else if (cmdName == "RemoveStops")
{
- SiteContainer* sites = dynamic_cast<SiteContainer *>(sequences);
+ SiteContainer* sites = dynamic_cast<SiteContainer*>(sequences);
+ if (!sites)
+ {
+ VectorSequenceContainer* sc = new VectorSequenceContainer(sequences->getAlphabet());
+ for (unsigned int i = 0; i < sequences->getNumberOfSequences(); ++i)
+ {
+ auto_ptr<Sequence> seq(sequences->getSequence(i).clone());
+ SequenceTools::removeStops(*seq);
+ sc->addSequence(*seq);
+ }
+ delete sequences;
+ sequences = sc;
+ } else {
+ VectorSiteContainer* sc = new VectorSiteContainer(sequences->getAlphabet());
+ for (unsigned int i = 0; i < sequences->getNumberOfSequences(); ++i)
+ {
+ auto_ptr<Sequence> seq(sequences->getSequence(i).clone());
+ SequenceTools::replaceStopsWithGaps(*seq);
+ sc->addSequence(*seq);
+ }
+ delete sequences;
+ sequences = sc;
+ }
+ }
+
+ // +--------------+
+ // | Remove stops |
+ // +--------------+
+ else if (cmdName == "RemoveColumnsWithStop")
+ {
+ SiteContainer* sites = dynamic_cast<SiteContainer*>(sequences);
if (!sites)
{
- throw Exception("'RemoveStops' can only be used on alignment. You may consider using the 'CoerceToAlignment' command.");
+ throw Exception("'RemoveColumnsWithStop' can only be used on alignment. You may consider using the 'CoerceToAlignment' command.");
}
for (unsigned int i = sites->getNumberOfSites(); i > 0; i--)
@@ -297,7 +331,12 @@ int main(int args, char** argv)
for (unsigned int i = 0; i < sequences->getNumberOfSequences(); i++)
{
BasicSequence seq = sequences->getSequence(i);
+ unsigned int len = seq.size();
SequenceTools::getCDS(seq, false, true, true, false);
+ if (aligned) {
+ for (unsigned int c = seq.size(); c < len; ++c)
+ seq.addElement(seq.getAlphabet()->getGapCharacterCode());
+ }
sc->addSequence(seq, false);
}
delete sequences;
@@ -355,7 +394,7 @@ int main(int args, char** argv)
{
map<int, double> freqs;
SiteTools::getFrequencies(sites->getSite(i - 1), freqs);
- if (freqs[-1] >= gapFreq) sites->deleteSite(i - 1);
+ if (freqs[-1] > gapFreq) sites->deleteSite(i - 1);
}
}
else
@@ -388,6 +427,41 @@ int main(int args, char** argv)
delete sequences;
sequences = sc;
}
+ // +------------------+
+ // | GetCodonPosition |
+ // +------------------+
+ else if (cmdName == "GetCodonPosition")
+ {
+ unsigned int pos = ApplicationTools::getParameter<unsigned int>("position", cmdArgs, 3);
+ OrderedSequenceContainer* sc = dynamic_cast<OrderedSequenceContainer*>(SequenceContainerTools::getCodonPosition(*sequences, pos - 1));
+ delete sequences;
+ if (aligned) {
+ sequences = new VectorSiteContainer(*sc);
+ delete sc;
+ } else {
+ sequences = sc;
+ }
+ }
+ // +-----------------+
+ // | FilterFromTree |
+ // +-----------------+
+ else if (cmdName == "FilterFromTree")
+ {
+ auto_ptr<Tree> tree(PhylogeneticsApplicationTools::getTree(cmdArgs, ""));
+ vector<string> names = tree->getLeavesNames();
+ OrderedSequenceContainer* reorderedSequences = 0;
+ if (aligned) {
+ reorderedSequences = new VectorSiteContainer(sequences->getAlphabet());
+ } else {
+ reorderedSequences = new VectorSequenceContainer(sequences->getAlphabet());
+ }
+ for (size_t i = 0; i < names.size(); ++i) {
+ reorderedSequences->addSequence(sequences->getSequence(names[i]), false);
+ }
+ delete sequences;
+ sequences = reorderedSequences;
+ }
+
else throw Exception("Unknown action: " + cmdName);
}
diff --git a/debian/changelog b/debian/changelog
index cd1de72..6516fcc 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+bppsuite (0.7.0-1) unstable; urgency=low
+
+ * Several program improvements (more models, options, etc.)
+ * New program bpp Alignment scores.
+
+ -- Julien Dutheil <julien.dutheil at univ-montp2.fr> Wed, 15 Feb 2012 09:17:00 +0100
+
bppsuite (0.6.2-1) unstable; urgency=low
* RFP: Bio++ -- The Bio++ bioinformatics libraries. (Closes: #616373).
diff --git a/debian/control b/debian/control
index 5053b90..f7c1347 100644
--- a/debian/control
+++ b/debian/control
@@ -4,12 +4,12 @@ Priority: optional
Maintainer: Loic Dachary <loic at dachary.org>
Uploaders: Julien Dutheil <julien.dutheil at univ-montp2.fr>
Build-Depends: debhelper (>= 5), cmake (>= 2.6), dpkg (>= 1.15.4) | install-info, texinfo,
- libbpp-phyl-dev (>= 2.0.1)
+ libbpp-phyl-dev (>= 2.0.3)
Standards-Version: 3.9.1
Package: bppsuite
Architecture: any
-Depends: ${shlibs:Depends}, ${misc:Depends}, libbpp-phyl9 (>= 2.0.1)
+Depends: ${shlibs:Depends}, ${misc:Depends}, libbpp-phyl9 (>= 2.0.3)
Description: Bio++ program suite
Includes programs:
- BppML for maximum likelihood analysis,
@@ -21,4 +21,6 @@ Description: Bio++ program suite
- BppConsense for building consensus tree and computing bootstrap values,
- BppPhySamp for phylogenetic sampling,
- BppReRoot for tree rerooting.
+ - BppTreeDraw for tree drawing.
+ - BppAlnScore for comparing alignments and computing alignment scores.
diff --git a/doc/bppsuite.texi b/doc/bppsuite.texi
index c0c67ae..48af52f 100644
--- a/doc/bppsuite.texi
+++ b/doc/bppsuite.texi
@@ -1,7 +1,7 @@
\input texinfo @c -*-texinfo-*-
@c %**start of header
@setfilename bppsuite.info
- at settitle BppSuite Manual 0.6.0
+ at settitle BppSuite Manual 0.7.0
@documentencoding UTF-8
@afourpaper
@dircategory Science Biology Genetics
@@ -15,16 +15,15 @@
* bppphysamp: (bppphysamp) Bio++ Phylogenetic Sampler.
* bppreroot: (bppreroot) Bio++ Serial Tree Re-rooting.
* bppseqman: (bppseqman) Bio++ Sequences Manipulation.
+* bppalnscore: (bppalnscore) Bio++ Alignment Scoring
* bpptreedraw: (bpptreedraw) Bio++ Tree Drawing.
@end direntry
@c %**end of header
- at include version.texi
-
@copying
-This is the manual of the Bio++ Program Suite, version @value{VERSION}, @value{UPDATED}.
+This is the manual of the Bio++ Program Suite, version 0.6.0.
-Copyright @copyright{} 2007, 2008, 2009, 2010, 2011 Bio++ development team
+Copyright @copyright{} 2007, 2008, 2009, 2010, 2011, 2012 Bio++ development team
@end copying
@titlepage
@@ -64,6 +63,7 @@ Common options encountered in several programs.
* Model:: Setting up a substitution model.
* Estimation:: Estimating parameters by maximizing a likelihood function.
* WritingSequences:: Writing sequences/alignments to files.
+* WritingTrees:: Writing trees to files.
Bio++ Program Suite Reference
@@ -76,6 +76,7 @@ Bio++ Program Suite Reference
* bppphysamp:: Bio++ Phylogenetic Sampler.
* bppreroot:: Bio++ Serial Tree Re-rooting.
* bppseqman:: Bio++ Sequences Manipulation.
+* bppalnscore:: Bio++ Alignment Scoring
* bpptreedraw:: Bio++ Tree Drawing.
@end detailmenu
@@ -240,15 +241,15 @@ It is possible to recall anywhere the value of an option by using $(parameter).
@example
optimization.topology.algorithm = NNI
optimization.topology.algorithm_nni.method = phyml
-output.tree = MyData_$(optimization.topology.algorithm)_$(optimization.topology.algorithm_nni.method).dnd
+output.tree.file = MyData_$(optimization.topology.algorithm)_$(optimization.topology.algorithm_nni.method).dnd
@end example
@end cartouche
You can use this syntax to define global variables:
@cartouche
@example
data=MyData
-sequence.file=$(data).fasta
-input.tree=$(data).dnd
+input.sequence.file=$(data).fasta
+input.tree.file=$(data).dnd
output.infos=$(data).infos
@end example
@end cartouche
@@ -261,8 +262,8 @@ For instance:
@example
#Option file 1:
param=options2.bpp
-sequence.file=$(data).fasta
-sequence.format=Fasta
+input.sequence.file=$(data).fasta
+input.sequence.format=Fasta
@end example
@end cartouche
@cartouche
@@ -286,6 +287,7 @@ data=LSU
* Model:: Setting up a substitution model.
* Estimation:: Estimating parameters by maximizing a likelihood function.
* WritingSequences:: Writing sequences/alignments to files.
+* WritingTrees:: Writing trees to files.
@end menu
@node Sequences, Tree, Common, Common
@@ -293,10 +295,15 @@ data=LSU
@table @command
@item alphabet =
-@{DNA|RNA|Protein|Word(letter=@{DNA|RNA|Protein@},length=@{int@})|
+@{DNA|RNA|Protein|Binary|Word(letter=@{DNA|RNA|Protein@},length=@{int@})|
Codon(letter=@{DNA|RNA@}, type=@{Standard|EchinodermMitochondrial|InvertebrateMitochondrial|\
VertebrateMitochondrial@})@}
-The alphabet to use when reading sequences.
+The alphabet to use when reading sequences. DNA and RNA alphabet can in addition take an argument:
+
+ at table @command
+ at item bangAsgap=@{bool@}
+Tell is exclamation mark should be considered as a gap character. The default is to consider it as an unknown character such as 'N' or '?'.
+ at end table
@item input.sequence.file=@{path@}
The sequence file to use. Depending on the program, these sequences have or do not have to be aligned.
@@ -310,8 +317,10 @@ The format is a function, with optional parameters:
@table @command
- at item Fasta()
+ at item Fasta(extended=@{bool@}, strictNames=@{bool@})
The fasta format.
+The argument @command{extended}, default to 'no', allows to enable the HUPO-PSI extension of the format.
+The argument @command{strict_names}, default to 'no', specifies that only the first word in the fasta header is used as a sequence names, the rest of the header being considered as comments.
@item Mase(siteSelection=@{chars@})
The Mase format (as read by Seaview and Phylo_win for instance), with an optional site selection name.
@@ -379,7 +388,7 @@ original site numbering will be used in the output files (if relevant).
@item input.tree.file = @{path@}
The phylogenetic tree file to use.
- at item input.tree.format = @{Newick|Nexus@}
+ at item input.tree.format = @{Newick|Nexus|NHX@}
The format of the input tree file.
@end table
@@ -391,7 +400,7 @@ The corresponding options are then:
@item input.trees.file = @{path@}
The file containing multiple trees.
- at item input.trees.format = @{Newick|Nexus@}
+ at item input.trees.format = @{Newick|Nexus|NHX@}
The format of the input tree file.
@end table
@@ -434,6 +443,28 @@ be built. The substitution model is a function, potentially including
parameters. The following table lists the set of usable functions, and
their parameters.
+Many models have a set of optional parameters denoted here as
+"equilibrium frequencies" that are used to initialize the parameters
+of the model related with the equilibrium frequencies. These options
+are:
+
+ at table @command
+
+ at item initFreqs=values(@{real]0,1[@},...,@{real]0,1[@})
+The equilibrium frequency is set equal (as much as possible) to the
+given frequencies. Those frequencies are given in the same order as
+the alphabet, and they must sum 1.
+
+ at item initFreqs=observed
+The equilibrium frequency is set equal (as much as possible) to the
+observed frequencies.
+
+ at item initFreqs.observedPseudoCount=@{integer@}
+a peusocount integer added to all counts of letters (or words), when
+the frequencies are computed from observed data.
+
+ at end table
+
@subsubsection Nucleotide models
@table @command
@@ -441,39 +472,37 @@ their parameters.
@item JC69
The Jukes and Cantor model. This model has no additional parameter.
- at item K80(kappa=@{real>0@})
-The Kimura 2 parameters model. @var{kappa} is the transition over transversion ratio.
-
- at item F84(kappa=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, useObservedFreqs=@{boolean@}, useObservedFreqs.pseudoCount=@{int>0@})
-Felsenstein's 1984 substitution model, with
-transition/transversion ratio and 4 distinct equilibrium frequencies,
-set using three independent parameters: @var{theta} is the GC content,
- at var{theta1} is the proportion of G / (G + C) and @var{theta2} is the
-proportion of A / (A + T or U). The @var{useObservedFreqs} option set
-the @var{theta}s parameters according to the observed counts in the
-data set, and @var{useObservedFreqs.pseudoCount} is a quantity,
-defaulting to 0, that can be used in case some counts are zero, on
-small data sets for instance. The corrected values are computed as:
- at tex
-$$\pi_i = {f_i + \psi \over 4\cdot\psi + \sum_j f_j}$$
- at end tex
-
- at item HKY85(kappa=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, useObservedFreqs=@{boolean@}, useObservedFreqs.pseudoCount=@{int>0@})
-Hasegawa, Kishino and Yano 1985's substitution model.
-The model is similar to @command{F84}, but with a different implementation.
-The @var{kappa} parameter used here is comparable to the one in @command{K80}.
-
- at item T92(kappa=@{real>0@}, theta=@{real]0,1[@}, useObservedFreqs=@{boolean@}, useObservedFreqs.pseudoCount=@{int>0@})
-Tamura 1992's model for nucleotides, similar to @command{HKY85}, yet assuming that the frequencies of A = T/U and G = C.
-
- at item TN93(kappa1=@{real>0@}, kappa2=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, useObservedFreqs=@{boolean@}, useObservedFreqs.pseudoCount=@{int>0@})
-Tamura and Nei 1993's model, similar to @command{HKY85}, but allowing for two distinct transition/transversion ratios.
-
- at item GTR(a=@{real>0@}, b=@{real>0@}, c=@{real>0@}, d=@{real>0@}, e=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, useObservedFreqs=@{boolean@}, useObservedFreqs.pseudoCount=@{int>0@})
-The General Time-Reversible substitution model.
-Parameters @var{a}, @var{b}, @var{c}, @var{d}, @var{e} are the entries of the exchangeability matrix.
-
- at item L95(beta=@{real>0@}, gamma=@{real>0@}, delta=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, useObservedFreqs=@{boolean@}, useObservedFreqs.pseudoCount=@{int>0@})
+ at item K80([kappa=@{real>0@}])
+The Kimura 2 parameters model. @var{kappa} is the transition over
+transversion ratio. Default: @var{kappa}=1
+
+ at item F84([kappa=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@},theta2=@{real]0,1[@} ,"equilibrium frequencies"] )
+Felsenstein's 1984 substitution model, with transition/transversion
+ratio and 4 distinct equilibrium frequencies, set using three
+independent parameters: @var{theta} is the GC content, @var{theta1} is
+the proportion of G / (G + C) and @var{theta2} is the proportion of A
+/ (A + T or U).
+
+ at item HKY85([kappa=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@} ,"equilibrium frequencies"])
+Hasegawa, Kishino and Yano 1985's substitution model. The model is
+similar to @command{F84}, but with a different implementation. The
+ at var{kappa} parameter used here is comparable to the one in
+ at command{K80}.
+
+ at item T92([kappa=@{real>0@}, theta=@{real]0,1[@} ,"equilibrium frequencies"])
+Tamura 1992's model for nucleotides, similar to @command{HKY85}, yet
+assuming that the frequencies of A = T/U and G = C.
+
+ at item TN93([kappa1=@{real>0@}, kappa2=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@} ,"equilibrium frequencies"])
+Tamura and Nei 1993's model, similar to @command{HKY85}, but allowing
+for two distinct transition/transversion ratios.
+
+ at item GTR([a=@{real>0@}, b=@{real>0@}, c=@{real>0@}, d=@{real>0@}, e=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@} ,"equilibrium frequencies"])
+The General Time-Reversible substitution model. Parameters @var{a},
+ at var{b}, @var{c}, @var{d}, @var{e} are the entries of the
+exchangeability matrix.
+
+ at item L95([beta=@{real>0@}, gamma=@{real>0@}, delta=@{real>0@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@} ,"equilibrium frequencies"])
The strand-symmetric model of Lobry 1995, for nucleotides.
@end table
@@ -497,57 +526,72 @@ Protein substitution model, from Whelan & Goldman 2001.
@item LG08
Protein substitution model, from Le & Gascuel 2008.
- at item LLG08_EX2(relrate1=@{real]0,1[@}, relproba1=@{real]0,1[@})
+ at item LLG08_EX2([relrate1=@{real]0,1[@}, relproba1=@{real]0,1[@}])
Protein substitution model, from Le, Lartillot & Gascuel 2008. See
the meaning of the variables in the Mixture model below.
- at item LLG08_EX3(relrate1=@{real]0,1[@}, relrate2=@{real]0,1[@}, relproba1=@{real]0,1[@}, relproba2=@{real]0,1[@})
+ at item LLG08_EX3([relrate1=@{real]0,1[@}, relrate2=@{real]0,1[@}, relproba1=@{real]0,1[@}, relproba2=@{real]0,1[@}])
Protein substitution model, from Le, Lartillot & Gascuel 2008. See
the meaning of the variables in the Mixture model below.
- at item LLG08_EHO(relrate1=@{real]0,1[@}, relrate2=@{real]0,1[@}, relproba1=@{real]0,1[@}, relproba2=@{real]0,1[@})
+ at item LLG08_EHO([relrate1=@{real]0,1[@}, relrate2=@{real]0,1[@}, relproba1=@{real]0,1[@}, relproba2=@{real]0,1[@}])
Protein substitution model, from Le, Lartillot & Gascuel 2008. See
the meaning of the variables in the Mixture model below.
- at item LLG08_UL2(relrate1=@{real]0,1[@}, relproba1=@{real]0,1[@})
+ at item LLG08_UL2([relrate1=@{real]0,1[@}, relproba1=@{real]0,1[@}])
Protein substitution model, from Le, Lartillot & Gascuel 2008. See
the meaning of the variables in the Mixture model below.
- at item LLG08_UL3(relrate1=@{real]0,1[@}, relrate2=@{real]0,1[@}, relproba1=@{real]0,1[@}, relproba2=@{real]0,1[@})
+ at item LLG08_UL3([relrate1=@{real]0,1[@}, relrate2=@{real]0,1[@}, relproba1=@{real]0,1[@}, relproba2=@{real]0,1[@}])
Protein substitution model, from Le, Lartillot & Gascuel 2008. See
the meaning of the variables in the Mixture model below.
- at item DSO78+F(theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., useObservedFreqs=@{boolean@}, useObservedFreqs.pseudoCount=@{int>0@})
+ at item DSO78+F([theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ... ,"equilibrium frequencies"])
Protein substitution model, using the dcmutt implementation of Kosiol
and Goldman 2005 and free equilibrium frequencies. The @var{thetaX}
are frequencies parameters, where X is 1 to 19. Parameter @var{theta1}
is the proportion of A, @var{theta2} is the proportion of R over
(1-A), @var{theta3} the proportion of N over (1-A-R), etc.
- at item JTT92+F(theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., useObservedFreqs=@{boolean@}, useObservedFreqs.pseudoCount=@{int>0@})
+ at item JTT92+F([theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., "equilibrium frequencies"])
Protein substitution model, using the dcmutt implementation of Kosiol
and Goldman 2005 and free equilibrium frequencies.
- at item WAG01+F(theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., useObservedFreqs=@{boolean@}, useObservedFreqs.pseudoCount=@{int>0@})
+ at item WAG01+F([theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., "equilibrium frequencies"])
Protein substitution model, from Whelan & Goldman 2001, and free
equilibrium frequencies.
- at item LG08+F(theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., useObservedFreqs=@{boolean@}, useObservedFreqs.pseudoCount=@{int>0@})
+ at item LG08+F([theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., "equilibrium frequencies"])
Protein substitution model, from Le & Gascuel 2008, and free
equilibrium frequencies.
@item Empirical(name=@{chars@}, file=@{path@})
-Build a protein substitution model from a file in PAML format, and use 'name' as a namespace for parameters.
+Build a protein substitution model from a file in PAML format, and use
+'name' as a namespace for parameters.
- at item Empirical+F(name=@{chars@}, file=@{path@}, theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., useObservedFreqs=@{boolean@}, useObservedFreqs.pseudoCount=@{int>0@})
-Build a protein substitution model from a file in PAML format, and use free equilibrium frequencies. 'name' will be used as a parameter namespace, including for frequencies.
+ at item Empirical+F(name=@{chars@}, file=@{path@}, [theta=@{real]0,1[@}, theta1=@{real]0,1[@}, theta2=@{real]0,1[@}, ..., "equilibrium frequencies"])
+Build a protein substitution model from a file in PAML format, and use
+free equilibrium frequencies. 'name' will be used as a parameter
+namespace, including for frequencies.
+
+ at end table
+
+ at subsubsection Miscellaneous models
+
+ at table @command
+ at item Binary([kappa=@{real>0@} ,"equilibrium frequencies"])
+Build the model on binary alphabet, where @var{kappa} is the relative
+proportion of 1 over 0 in the equilibrium distribution. Default:
+ at var{kappa}=1.
@end table
@subsubsection Codon models
-Standard codon models: the optional @var{genetic_code} argument describes the genetic code. If
-it is not given, the one related with the alphabet is used. The several values available are described below.
+Standard codon models: the optional @var{genetic_code} argument
+describes the genetic code. If it is not given, the one related with
+the alphabet is used. The several values available are described
+below.
@itemize
@item EchinodermMitochondrialGeneticCode
@@ -557,9 +601,11 @@ it is not given, the one related with the alphabet is used. The several values a
@item YeastMitochondrialGeneticCode
@end itemize
-The next codon models also take as argument a @var{frequencies} option
-specifying the equilirium frequencies of the model. Any frequencies description can be used here, but the syntax also supports
-options similar to the ones used in the PAML software:
+The next codon models also take as argument a @var{frequencies} option
+specifying the equilibrium frequencies of the model. Any frequencies
+description can be used here, but the syntax also supports options
+similar to the ones used in the PAML software:
+
@itemize
@item F0: all frequencies are assumed to be fixed and equal to 1/61, 0 for stop codons.
@item F1X4: 4 distinct frequencies are used, with parameters theta, theta1, theta2 (@xref{Frequencies sets}, ``Full'' method).
@@ -568,38 +614,38 @@ options similar to the ones used in the PAML software:
@end itemize
The same words can be used to specify root frequencies for codon
-models, in the case of non reversibility.
+models, in the case of non stationarity.
@table @command
- at item GY94([genetic_code=@{genetic code description@} , kappa=@{real>0@}, V=@{real>0@}])
+ at item GY94([genetic_code=@{genetic code description@}, kappa=@{real>0@}, V=@{real>0@}, "equilibrium frequencies"])
Goldman and Yang (1994) substitution model for codons (default values:
@var{kappa}=1 and @var{V}=10000).
- at item MG94([genetic_code=@{genetic code description@}, rho=@{real>0@}])
+ at item MG94([genetic_code=@{genetic code descrition@}, rho=@{real>0@}, "equilibrium frequencies"])
Muse and Gaut (1994) substitution model for codons (default values:
@var{rho}=1).
- at item YN98([genetic_code=@{genetic code description@}, kappa=@{real>0@}, omega=@{real>0@}])
+ at item YN98([genetic_code=@{genetic code description@}, kappa=@{real>0@}, omega=@{real>0@}, "equilibrium frequencies"])
Yang and Nielsen (1998) substitution model for codons (default values:
@var{kappa}=1 and @var{omega}=1).
- at item YNGKP_M0([genetic_code=@{genetic code description@}, kappa=@{real>0@}, omega=@{real>0@}])
+ at item YNGKP_M0([genetic_code=@{genetic code description@}, kappa=@{real>0@}, omega=@{real>0@}, "equilibrium frequencies"])
The M0 model of PAML, ie the same as YN98.
- at item YNGKP_M1([genetic_code=@{genetic code description@},kappa=@{real>0@}, omega=@{real>0@}, p0=@{real>0 and <1 @}])
+ at item YNGKP_M1([genetic_code=@{genetic code description@},kappa=@{real>0@}, omega=@{real>0@}, p0=@{real>0 and <1 @}, "equilibrium frequencies"])
The M1a model of PAML, see Yang, Z., R. Nielsen, N. Goldman, and A.-M.
K. Pedersen (2000) (default values: @var{kappa}=1, @var{p0}=0.5,
@var{omega}=0.5).
- at item YNGKP_M2([genetic_code=@{genetic code description@},kappa=@{real>0@}, omega0=@{real>0 and <1@}, theta1=@{real>0 and <1 @}], omega1=@{real>1@}, theta2=@{real>0 and <1 @}])
+ at item YNGKP_M2([genetic_code=@{genetic code description@},kappa=@{real>0@}, omega0=@{real>0 and <1@}, theta1=@{real>0 and <1 @}], omega1=@{real>1@}, theta2=@{real>0 and <1 @}, "equilibrium frequencies"])
The M2a model of PAML, see Yang, Z., R. Nielsen, N. Goldman, and A.-M.
K. Pedersen (2000), with p0=theta1 and
p1=(1-theta1)*theta2 (default values: @var{kappa}=1, @var{theta1}=0.33333,
@var{theta2}=0.5, @var{omega0}=0.5, @var{omega2}=0.5).
- at item YNGKP_M3(genetic_code=@{genetic code description@}, n=@{integer>0@}, kappa=@{real>0@}, omega0=@{real>0 and <1@}, delta1=@{real>0@}, ..., delta at var{n-1}=@{real>0@}, theta1=@{real>0 and <1 @}, ..., theta at var{n-1}1=@{real>0 and <1 @}])
+ at item YNGKP_M3([genetic_code=@{genetic code description@}, n=@{integer>0@}, kappa=@{real>0@}, omega0=@{real>0 and <1@}, delta1=@{real>0@}, ..., delta at var{n-1}=@{real>0@}, theta1=@{real>0 and <1 @}, ..., theta at var{n-1}1=@{real>0 and <1 @}, "equilibrium frequencies"])
The M3 model of PAML, see Yang, Z., R. Nielsen, N. Goldman, and A.-M.
K. Pedersen (2000), with @var{n} discrete values, with p0=theta1
@@ -607,13 +653,13 @@ and pk=(1-theta1)*...*(1-thetak)*theta(k+1), and
omegak=omega0+delta1+....+deltak (default values: @var{n}=3,
@var{kappa}=1, @var{thetak}=1/(n-k+1), @var{omega0}=0.5, @var{deltak}=0.5).
- at item YNGKP_M7(n=@{integer>0@}, [genetic_code=@{genetic code description@},kappa=@{real>0@}, p=@{real>1@}, q=@{real>1 @}])
+ at item YNGKP_M7(n=@{integer>0@}, genetic_code=@{genetic code description@},kappa=@{real>0@}, p=@{real>1@}, q=@{real>1 @}, "equilibrium frequencies"])
The M7 model of PAML, see Yang, Z., R. Nielsen, N. Goldman, and A.-M.
K. Pedersen (2000), with the Beta distribution discretized in @var{n}
classes (default values: @var{kappa}=1, @var{p}=2, @var{q}=2).
- at item YNGKP_M7(n=@{integer>0@}, [genetic_code=@{genetic code description@},kappa=@{real>0@}, omegas=@{real>1@}, p0=@{real>0@},p=@{real>1@}, q=@{real>1 @}])
+ at item YNGKP_M8(n=@{integer>0@}, [genetic_code=@{genetic code description@},kappa=@{real>0@}, omegas=@{real>1@}, p0=@{real>0@},p=@{real>1@}, q=@{real>1 @}, "equilibrium frequencies"])
The M8 model of PAML, see Yang, Z., R. Nielsen, N. Goldman, and A.-M.
K. Pedersen (2000), with the Beta distribution discretized in @var{n}
@@ -622,115 +668,112 @@ classes (default values: @var{kappa}=1, @var{p}=2, @var{q}=2,
@end table
-It is also possible to setup more specific models, by specifying a nucleotide model for each position.
-Model parameters names then take the form of CodonModel.<position set>_<position model name>.<position specific parameter name>.
-
- at table @command
-
- at item CodonNeutral(model=@{model name@} [, relrate1=@{real>0@}, relrate2=@{real>0@}])
-or
- at item CodonNeutral(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}[, relrate1=@{real>0@}, relrate2=@{real>0@}])
+It is also possible to setup more specific models, by specifying a
+nucleotide model for each position. Model parameters names then take
+the form of <codon model name>.<position set>_<position model name>.<position specific parameter name>.
-Substitution model on codons. The arguments @var{model} and
- at var{model@{i@}} are for descriptions of models on bases. The alphabet
-must be a codon alphabet.
+In the following models, the arguments @var{model} and
+ at var{model@{i@}} are for descriptions of models on bases.
-If the argument is @var{model}, the @emph{same} single site model is
+ at itemize
+ at item If the argument is @var{model}, the @emph{same} single site model is
used on all positions (ie the parameters are shared between all
positions).
-
-If the arguments are @var{model1}, @var{model2}, @var{model3}, each
+ at item If the arguments are @var{model1}, @var{model2}, @var{model3}, each
single site model stands for a single-site substitution model. In that
case, all single site models parameters are position dependent.
+ at end itemize
Each single site model is normalized and the substitution rates
between codons that differ on more than one letter are null.
-Arguments @var{relrate@{i@}} stands for the relative substitution rates
-of the sites. Default: @var{relrate@{i@}=1/@{4-i@}}, such that the rate
-of each site is 1/3.
-
The generator is first computed with these models and parameters on
the whole triplet alphabet, and then the substitution rates to and
from stop codons are set to zero and the generator is normalized with
this modification.
+ at table @command
+
+ at item CodonRate(model=@{model name@} [, relrate1=@{real>0@}, relrate2=@{real>0@}, "equilibrium frequencies"])
+
+or
+
+ at item CodonRate(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}[, relrate1=@{real>0@}, relrate2=@{real>0@}, "equilibrium frequencies"])
+
+Substitution model on codons with position specific evolution rates.
+
+Arguments @var{relrate@{i@}} stands for the relative substitution rates
+of the sites. Default: @var{relrate@{i@}=1/@{4-i@}}, such that the rate
+of each site is 1/3.
+
@example
alphabet=Codon(letter=DNA, type=Standard)
-model=CodonNeutral(model=T92)
+model=CodonRate(model=T92)
@end example
builds a model on codons, such all sites follow the same T92 model.
-The parameters names are @var{CodonNeutral.123_T92.kappa},
- at var{CodonNeutral.relrate1}, @var{CodonNeutral.relrate2}.
+The parameters names are @var{CodonRate.123_T92.kappa},
+ at var{CodonRate.relrate1}, @var{CodonRate.relrate2}.
@example
alphabet=Codon(letter=DNA, type=Standard)
-model=CodonNeutral(model1=T92, model2=T92, model3=JC69)
+model=CodonRate(model1=T92, model2=T92, model3=JC69)
@end example
builds a model on codons, such that first and second sites follow
independent T92 models, and third site follows a JC69 model. Then the
-parameters names are @var{CodonNeutral.1_T92.kappa},
- at var{CodonNeutral.2_T92.kappa}, @var{CodonNeutral.relrate1},
- at var{CodonNeutral.relrate2}, and can be initialized as is:
+parameters names are @var{CodonRate.1_T92.kappa},
+ at var{CodonRate.2_T92.kappa}, @var{CodonRate.relrate1},
+ at var{CodonRate.relrate2}, and can be initialized as is:
@example
-model=CodonNeutral(model1=T92, model2=T92, model3=JC69,\
- 1_T92.theta=0.5, 1_T92.kappa=2.0, 2_T92.theta=0.4, 2_T92.kappa=2.0)
+model=CodonRate(model1=T92(theta=0.5, kappa=2), \
+ model2=T92(theta=0.4, kappa=2), model3=JC69)
@end example
- at item CodonAsynonymous(model=@{model name@}[, genetic_code=@{genetic code description@}, beta=@{real>0@}])
+ at item CodonDistance(model=@{model name@}[, genetic_code=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
or
- at item CodonAsynonymous(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}[, geneticcode=@{genetic code description@}, beta=@{real>0@}])
-
-substitution model on codons.
+ at item CodonDistance(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}[, geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
-The arguments @var{model} and @var{model@{i@}} are for descriptions of
-models on bases. The alphabet must be a codon alphabet.
-
-If the argument is @var{model}, the @emph{same} single site model is
-used on all positions (ie the parameters are shared between all
-positions).
-
-If the arguments are @var{model1}, @var{model2}, @var{model3}, each
-single site model stands for a single-site substitution model. In that
-case, all single site models parameters are position dependent.
-
-Each single site model is normalized and the substitution rates
-between codons that differ on more than one letter are null.
-
-In addition to these models, the optional @var{geneticcode} argument
-describes the genetic code. If it is not given, the one related with
-the alphabet is used. The several values available are described
-below.
+Substitution model on codons that takes into account the difference
+between synonymous and non-synonymous substitutions.
Optional argument @var{beta} is the ratio between non-synonymous
substitution rate and synonymous substitution rate. Default value: 1.
@example
alphabet=Codon(letter=DNA, type=Standard)
-model=CodonAsynonymous(model=T92)
+model=CodonDistance(model=T92)
@end example
builds a model on codons, such all sites follow the same T92 model.
-The parameters names are @var{CodonAsynonymous.123_T92.kappa} and
- at var{CodonAsynonymous.beta}.
+The parameters names are @var{CodonDistance.123_T92.kappa} and
+ at var{CodonDistance.beta}.
@example
alphabet=Codon(letter=DNA, type=Standard)
-model=CodonNeutral(model1=T92, model2=T92, model3=JC69)
+model=CodonDistance(model1=T92, model2=T92, model3=JC69)
@end example
builds a model on codons, such that first and second sites follow
independent T92 models, and third site follows a JC69 model. Then the
-parameters names are @var{CodonAsynonymous.1_T92.kappa},
- at var{CodonAsynonymous.2_T92.kappa}, @var{CodonAsynonymous.beta}.
+parameters names are @var{CodonDistance.1_T92.kappa},
+ at var{CodonDistance.2_T92.kappa}, @var{CodonDistance.beta}.
+
+ at item CodonRateFrequencies(model=@{model name@}, frequencies=@{frequencies set description@}[, relrate1=@{real>0@}, relrate2=@{real>0@}, "equilibrium frequencies"])
+or
+ at item CodonRateFrequencies(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}, frequencies=@{frequencies set description@} [, relrate1=@{real>0@}, relrate2=@{real>0@}, "equilibrium frequencies"])
+
- at item CodonNeutralFrequencies(frequencies=@{frequencies set description@} [, relrate1=@{real>0@}, relrate2=@{real>0@}])
+Substitution model on codons with position specific evolution rates,
+where the sustitution rates are multiplied by the frequency of the
+target codon in the given frequencies set.
-substitution model on codons. The exchangeability model on each site
-is the same K80 model, and the equilibrium distribution of the model
-is description by the @var{frequencies} argument. See the description
-of the Frequencies Set below.
+This model should be used with nucleotidic models which equilibrium
+distribution is fixed, ans does not depend on the parameters.
+Otherwise there may be problems of identifiability of the parameters.
+
+The multiplicative distribution of the model is described by the
+ at var{frequencies} argument. See the description of the Frequencies Set
+below.
Each single site model is normalized and the substitution rates
between codons that differ on more than one letter are null.
@@ -739,53 +782,69 @@ Arguments @var{relrate@{i@}} stands for the relative substitution rates
of the sites. Default: @var{relrate@{i@}=1/@{4-i@}}, such that the rate
of each site is 1/3.
-The generator is first computed with these model and distribution on
-the whole triplet alphabet, and then the substitution rates to and
-from stop codons are set to zero and the generator is normalized with
-this modification.
-
@example
alphabet=Codon(letter=DNA, type=Standard)
-model=CodonNeutralFrequencies(frequencies=Full())
+model=CodonRateFrequencies(frequencies=Full())
@end example
-has parameters @var{CodonNeutralFrequencies.123_K80.kappa},
- at var{CodonNeutralFrequencies.Full.theta_1}, ...,
- at var{CodonNeutralFrequencies.Full.theta_60},
- at var{CodonNeutralFrequencies.relrate1},
- at var{CodonNeutralFrequencies.relrate2}.
+has parameters @var{CodonRateFrequencies.123_K80.kappa},
+ at var{CodonRateFrequencies.Full.theta_1}, ...,
+ at var{CodonRateFrequencies.Full.theta_60},
+ at var{CodonRateFrequencies.relrate1},
+ at var{CodonRateFrequencies.relrate2}.
- at item CodonAsynonymousFrequencies(frequencies=@{frequencies set description@} [geneticcode=@{genetic code description@}, beta=@{real>0@}])
+ at item CodonDistanceFrequencies(model=@{model name@}, frequencies=@{frequencies set description@} [geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
-substitution model on codons. The exchangeability model on each site
-is the same K80 model, and the equilibrium distribution of the model
-is description by the @var{frequencies} argument. See the description
-of the Frequencies Set below.
+or
-Each single site model is normalized and the substitution rates
-between codons that differ on more than one letter are null.
+ at item CodonDistanceFrequencies(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}, frequencies=@{frequencies set description@} [geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
-In addition to these models, the optional @var{geneticcode} argument
-describes the genetic code. If it is not given, the one related with
-the alphabet is used. The several values available are described
+Substitution model on codons that takes into account the difference
+between synonymous and non-synonymous substitutions. Moreover, the
+sustitution rates are multiplied by the frequency of the target codon
+in the given frequencies set.
+
+This model should be used with nucleotidic models which equilibrium
+distribution is fixed, ans does not depend on the parameters.
+Otherwise there may be problems of identifiability of the parameters.
+
+The multiplicative distribution of the model is described by the
+ at var{frequencies} argument. See the description of the Frequencies Set
below.
Optional argument @var{beta} is the ratio between non-synonymous
substitution rate and synonymous substitution rate. Default value: 1.
-The generator is first computed with these model and distribution on
-the whole triplet alphabet, and then the substitution rates to and
-from stop codons are set to zero and the generator is normalized with
-this modification.
-
@example
alphabet=Codon(letter=DNA, type=Standard)
-model=CodonAsynonymousFrequencies(frequencies=Full())
+model=CodonDistanceFrequencies(frequencies=Full())
@end example
-has parameters @var{CodonAsynonymousFrequencies.012_T92.kappa},
- at var{CodonAsynonymousFrequencies.Full.theta_1}, ...,
- at var{CodonAsynonymousFrequencies.Full.theta_60},
- at var{CodonAsynonymousFrequencies.beta}.
+has parameters @var{CodonDistanceFrequencies.012_T92.kappa},
+ at var{CodonDistanceFrequencies.Full.theta_1}, ...,
+ at var{CodonDistanceFrequencies.Full.theta_60},
+ at var{CodonDistanceFrequencies.beta}.
+ at item CodonDistancePhaseFrequencies(model=@{model name@}, frequencies=@{frequencies set description@} [geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
+
+or
+
+ at item CodonDistancePhaseFrequencies(model1=@{model name@}, model2=@{model name@}, model3=@{model name@}, frequencies=@{frequencies set description@} [geneticcode=@{genetic code description@}, beta=@{real>0@}, "equilibrium frequencies"])
+
+Substitution model on codons that takes into account the difference
+between synonymous and non-synonymous substitutions. Moreover, the
+sustitution rates are multiplied by the product of the frequencies of
+the changed nucleotides -- conditioned on the phase -- in the given
+frequencies set.
+
+This model should be used with nucleotidic models which equilibrium
+distribution is fixed, ans does not depend on the parameters.
+Otherwise there may be problems of identifiability of the parameters.
+
+The multiplicative distribution of the model is described by the
+ at var{frequencies} argument. See the description of the Frequencies Set
+below.
+
+Optional argument @var{beta} is the ratio between non-synonymous
+substitution rate and synonymous substitution rate. Default value: 1.
@end table
@@ -795,9 +854,9 @@ has parameters @var{CodonAsynonymousFrequencies.012_T92.kappa},
@table @command
- at item Word(model=@{model name@} [,relrate1=@{1>real>0@}, ..., relrate@{n-1@}=@{1>real>0@}])
+ at item Word(model=@{model name@} [,relrate1=@{1>real>0@}, ..., relrate@{n-1@}=@{1>real>0@}, "equilibrium frequencies"])
or
- at item Word(model1=@{model name@}, model1=@{model name@}, ..., modeln=@{model name@}[, relrate1=@{1> real>0@}, ..., relrate@{n-1@}=@{1> real>0@}])
+ at item Word(model1=@{model name@}, model1=@{model name@}, ..., modeln=@{model name@}[, relrate1=@{1> real>0@}, ..., relrate@{n-1@}=@{1> real>0@}, "equilibrium frequencies"])
substitution model on words. The arguments @var{model} and
@var{model@{i@}} are for descriptions of models on single sites such
@@ -830,7 +889,8 @@ model. The parameters names are @var{Word.1234_T92.kappa},
@example
alphabet=Word(letter=DNA,length=4)
-model=Word(model1=T92(), model2=T92(), model3=JC69(), model4=HKY85())
+model=Word(model1=T92(), model2=T92(), model3=JC69(), \
+ model4=HKY85())
@end example
builds a model on 4 bases words, such first and second sites follow
independent T92 models, third site follows a JC69 model, and fourth
@@ -892,35 +952,37 @@ These substitution models take as argument another substitution model, and add s
@table @command
- at item TS98(model=@{model description@}, s1=@{real>0@}, s2=@{real>0@})
+ at item TS98(model=@{model description@}, s1=@{real>0@}, s2=@{real>0@} [, "equilibrium frequencies"])
Tuffley and Steel 1998's 'covarion' model, taking a nested substitution model as argument for @var{model}.
The nested model can be any substitution model for any alphabet.
- at item G01(model=@{model description@}, rdist=@{rate distribution description@}, mu=@{real>0@})
+ at item G01(model=@{model description@}, rdist=@{rate distribution description@}, mu=@{real>0@} [, "equilibrium frequencies"])
Galtier 2001's 'covarion' model, taking a nested substitution model as argument for @var{model} and a rate distribution for parameter @var{rdist} (see below).
The nested model can be any substitution model for any alphabet.
- at item RE08(model=@{model description@}, lambda=@{real>0@}, mu=@{real>0@})
+ at item RE08(model=@{model description@}, lambda=@{real>0@}, mu=@{real>0@} [, "equilibrium frequencies"])
Rivas and Eddy 2008's substitution model with gaps, taking a nested substitution model as argument for @var{model}.
Parameter @var{lambda} is the insertion rate, while @var{mu} is the deletion rate.
@end table
- at subsubsection Mixture of models (beta feature)
+ at subsubsection Mixture of models
@table @command
-Mixed models combine any sustitution models with a priori distribution
-of parameters. Such models are still experimental and have not been
-yet fully tested. They should hence be used wih extra care!
+Mixed models combine any substitution models with a priori
+distribution of parameters. We call submodels all the models that are
+mixed in the mixture.
During the likelihood computation process, all the submodels of the
mixture are successively applied on the branches, and the mean of all
the likelihoods is computed. With nonhomogeneous reconstruction, since
a mixed model is a random variable, affecting a mixed model to a set
of branches means that all these branches are dependent, and in this
-case all the branches of the set have the same submodel at the same
-time.
+case a site follows the same submodels in all the branches of the set
+that support this mixed model. Moreover, it is possible to define
+paths that define dependencies between submodels of different
+mixtures (see below).
@item MixedModel(model=@{model description@})
Mixture model from a given @var{model} in which some parameters follow
@@ -939,7 +1001,7 @@ has parameters @var{TN93.kappa1_Gamma.alpha},
@var{TN93.theta}, @var{MixedModel.TN93.theta1},
@var{TN93.theta2}.
- at item Mixture(model1=@{model description@},..., modeln=@{model description@} [, relrate1=@{1>real>0@},..., relrate@{n-1@}=@{1>real>0@}, relproba1=@{1>real>0@}, ..., relproba@{n-1@}=@{1>real>0@}])
+ at item Mixture(model1=@{model description@},..., modeln=@{model description@} [, relrate1=@{1>real>0@},..., relrate@{n-1@}=@{1>real>0@}, relproba1=@{1>real>0@}, ..., relproba@{n-1@}=@{1>real>0@}, "equilibrium frequencies"])
Mixture model built from several @var{models}: each model has its own
probability and rate.
@@ -957,6 +1019,7 @@ has parameters at var{Mixture.relrate1}, @var{Mixture.relproba1},
@var{Mixture.2_YN98.kappa}, @var{Mixture.2_YN98.omega}.
@end table
+
@subsubsection Linking parameters
It is possible to reduce the parameter space by putting extra constraints on parameters, using for instance
@@ -1034,15 +1097,17 @@ where each alias is described as `param1->param2'. The full name of the paramete
@example
model1 = T92(theta=0.4, kappa=4)
model2 = GTR(theta=0.4, a = 1.1, b=0.4, c=0.4, d=0.25, e=0.1)
-nonhomogeneous.alias=GTR.theta_1->T92.theta_1
+nonhomogeneous.alias=GTR.theta1->T92.theta1
@end example
This option can be used to link parameters of the root frequencies if the model is non-stationary:
@example
nonhomogeneous.root_freq=Full(init=balanced)
-nonhomogeneous.alias=Full.theta->GTR.theta_1
+nonhomogeneous.alias=Full.theta1->GTR.theta1_1
@end example
+Note that this option is only available with the 'general' nonhomogeneous substitution models and will be ignored if used with "one_per_branch".
+
@end table
Finally, you may find useful the following options:
@@ -1055,23 +1120,121 @@ values, and leaf names with their id as suffix.
The use of that option will cause the program to exit just after producing the tagged tree.
@item output.parameter_names.file = @{@{path@}|none@}
-A text file listing all parameter names. This might come handy in order to specify the parameter that should not be optimized (see optimization.ignore_parameter) or aliased (see above).
-The use of that option will cause the program to exit just after producing the list file.
+A text file listing all parameter names. This might come handy in
+order to specify the parameter that should not be optimized (see
+optimization.ignore_parameter) or aliased (see above). The use of that
+option will cause the program to exit just after producing the list
+file.
@end table
+ at subsubsection Paths among non-homogeneous mixture models
+
+To define constraints for sites between submodels, we can set "paths"
+that any site must follow. For example, in the following description:
+
+ at example
+nonhomogeneous = general
+nonhomogeneous.number_of_models = 3
+
+model1=T92()
+model2=MixedModel(model=T92(kappa=Simple(values=(4,10,20),probas=(0.1,0.5,0.4))))
+model3=MixedModel(model=TN93(theta1=Simple(values=(0.1,0.5,0.9),probas=(0.3,0.2,0.5))))
+
+model1.nodes_id=0:1
+model2.nodes_id=2:3
+model3.nodes_id=4:5
+ at end example
+
+In this case, on branches 2 & 3 a site follows any submodel of model 2
+(but the same submodel on both branches), and on branches 4 & 5, a
+site follows any submodel of model 3 (the same on both branches as
+well). But there is no constraint between models 2 & 3, which means
+that a site can follow any submodel of model 2 and any submodel of
+model 3.
+
+If the user wants that a site with @var{T92.kappa=4} in model 2 has
+ at var{TN93.theta1=0.1} in model 3, that a site with @var{T92.kappa=10}
+in model 2 has @var{TN93.theta1=0.9} in model 3, and that other cases
+are free (in this case it means that @var{T92.kappa=20} in model 2 is
+linked with @var{TN93.theta1=0.5} in model 3), then we can use the
+declarations:
+
+ at example
+site.number_of_paths=2
+site.path1=model2[T92.kappa_1] & model3[TN93.theta1_2]
+site.path2=model2[T92.kappa_2] & model3[TN93.theta1_3]
+ at end example
+
+The third path (for the remaining submodels) is automatically
+computed.
+
+It is possible to link mixtures of submodels. For example,
+
+ at example
+site.path1=model2[T92.kappa_1] & model3[TN93.theta1_2] & model3[TN93.theta1_3]
+ at end example
+
+means that a site that has @var{T92.kappa=4} in model2 has either
+ at var{TN93.theta1=0.5} or @var{TN93.theta1=0.9} in model3.
+
+
+Because of these constraints, the probabilities of the submodels are
+linked. In the first example, probability of @var{T92.kappa=4} in
+model 2 equals the probability of @var{TN93.theta1=0.5} in model 3.
+Since it is contradictory with the probabilities defined in models 2
+or 3, the reference probabilities are the ones of the first numbered
+mixed model, here model 2. In this case, the probabilities in model 3
+may have no use, but with the second example the probability of
+submodel T92.kappa=4 equals the sum of the probabilities of submodels
+TN93.theta1=0.5 or TN93.theta1=0.9. The relative proportion of those
+models used in the declaration of model 3 is then used. Here their
+respective probabilities are then: 0.1*0.2/ (0.2+0.5)=0.0286 and
+0.1*0.5/(0.2+0.5)=0.0714.
+
+Concerning the optimization procedure, this choice may entail the non-
+identifiability of several parameters (here the probabilities in model
+3), so the user should be careful about this.
+
+Another example in the case of mixtures of mixed models, where the
+submodels are defined by their names;
+
+ at example
+nonhomogeneous = general
+nonhomogeneous.number_of_models = 2
+
+model1=LLG08_UL2()
+model2=LLG08_UL3()
+
+site.number_of_paths=2
+site.path1=model1[LLG08_UL2.M2] & model2[LLG08_UL3.Q1]
+site.path2=model1[LLG08_UL2.M1] & model2[LLG08_UL3.Q2] & model2[LLG08_UL3.Q3]
+ at end example
+
+When nonhomogeneity option is @option{one_per_branch}, each site is
+constrained to follow the same submodel from leaves to root.
+
@subsubsection Root frequencies
-In case of nonstationary models, the ancestral frequencies are distinct parameters. If a model is assumed to be stationary,
-the ``None'' parameter value can be used, which is strictly equivalent to setting
+In case of nonstationary models, the ancestral frequencies are
+distinct parameters. If a model is assumed to be stationary, the
+``None'' parameter value can be used, which is strictly equivalent to
+setting
@command{nonhomogeneous.stationary=yes}.
-As since version 0.4.0, BppSuite uses the keyval syntax to set up root frequencies,
+When the model is a mixture model, since there is not a set of
+equilibrium frequencies, with this option the root frequencies are set
+to be the average (with the respective probabilities of the submodels)
+of the equilibrium frequencies of the submodels.
+
+As since version 0.4.0, BppSuite uses the keyval syntax to set up root
+frequencies,
@table @command
@item nonhomogeneous.root_freq=@{frequency set description@}
@end table
-The Frequencies set used can be any of the ones described below @xref{Frequencies sets}, depending on the alphabet used.
+The Frequencies set used can be any of the ones described below
+ at xref{Frequencies sets}, depending on the alphabet used.
@subsection Frequencies sets
@@ -1132,9 +1295,53 @@ parameters names are @var{Word.1_GC.theta},
@var{Word.2_GC.theta}, @var{Word.4_Full.theta_1},
@var{Word.4_Full.theta_2}, @var{Word.4_Full.theta_3}.
+ at item Codon(frequency=@{frequency set description@})
+
+or
+
+ at item Codon(frequency1=@{frequency set description@}, frequency2=@{frequency set description@}, frequency3=@{frequency set description@})
+
+frequencies on codons computed as the product of frequencies on the
+letters, with stop codon frequencies set to zero. The arguments
+ at var{frequency} and @var{frequency@{i@}} are for descriptions of
+frequency sets on nucleotides. The alphabet must be a Codon alphabet.
+
+If the argument is @var{frequency}, the @emph{same} single site
+frequency set is used (ie the parameters are shared between all
+positions).
+
+If the arguments are @var{frequency1}, @var{frequency2},
+ at var{frequency3}, all single site frequency sets are independent. In
+that case, all single site frequency set parameters are position
+dependent.
+
+ at example
+alphabet=Codon(letter=DNA, type=Standard)
+Codon(frequency=GC())
+ at end example
+builds a frequency set on codons, such that all sites frequencies
+follow the same GC frequency set model. The parameter name is
+ at var{Codon.123_GC.theta}.
+
+ at example
+alphabet=Codon(letter=DNA, type=Standard)
+Codon(frequency1=GC(),frequency2=GC(),frequency3=Fixed())
+ at end example
+
+builds a frequency set on codons, such that first and second sites
+follow independent GC frequency sets, third site follows a Fixed
+frequency set. Then the parameters names are @var{Codon.1_GC.theta},
+ at var{Codon.2_GC.theta}.
+
+
+Predefined codon frequencies are available, with a syntax similar to
+the one used in the PAML software. See above Codon Models section.
+
@end table
-All functions accept the following arguments, that take priority over the parameter specification:
+All functions accept the following arguments, that take priority over
+the parameter specification:
+
@table @command
@item init=@{balanced,observed@}
@@ -1145,7 +1352,9 @@ If the frequencies are set from observed counts, a pseudoCount is
added to all the counts.
@item values=(@{vector<double>@})
-Explicitly set all frequencies manually. The size of the input vector should equal the number of resolved states in the alphabet, be in alphabetical order of states, and sum to one.
+Explicitly set all frequencies manually. The size of the input vector
+should equal the number of resolved states in the alphabet, be in
+alphabetical order of states, and sum to one.
@end table
@@ -1282,7 +1491,7 @@ where ``method'' can be one of
@table @command
@item None
- (no optimization is performed, initial values are kept ``as is''.
+No optimization is performed, initial values are kept ``as is''.
@item FullD(derivatives=@{Newton|Gradient@})
Full-derivatives method. Branch length derivatives are computed
@@ -1352,9 +1561,6 @@ with @command{YN98.freq_Word.1_}.
@item optimization.tolerance = @{float>0@}
The precision on the log-likelihood to reach.
- at item output.tree.file = @{@{path@}|none@}
-File path where to write the optimized tree.
-
@item output.infos = @{@{path@}|none@}
A text file containing several statistics for each site in the
alignment.
@@ -1362,9 +1568,11 @@ These statistics include the posterior rate, rate class with maximum posterior p
@end table
+The resulting tree will be written to a file specified by the general tree writing options (@ref{WritingTrees}).
+
@c ------------------------------------------------------------------------------------------------------------------
- at node WritingSequences, , Estimation, Common
+ at node WritingSequences, WritingTrees, Estimation, Common
@section Writing sequences/alignments to files
@table @command
@@ -1381,6 +1589,32 @@ In addition, most of the formats support the @command{length} argument, that spe
@c ------------------------------------------------------------------------------------------------------------------
+ at node WritingTrees, , WritingSequences, Common
+ at section Writing trees to files
+
+ at table @command
+ at item output.tree.file = @{path@}
+The phylogenetic tree file to write to.
+
+ at item output.tree.format = @{Newick|Nexus|NHX@}
+The format of the output tree file.
+
+ at end table
+
+Some programs may require that you write multiple trees to a file.
+The corresponding options are then:
+
+ at table @command
+ at item output.trees.file = @{path@}
+The file that will contain multiple trees.
+
+ at item output.trees.format = @{Newick|Nexus|NHX@}
+The format of the output tree file.
+
+ at end table
+
+ at c ------------------------------------------------------------------------------------------------------------------
+
@node Reference, , Common, Top
@chapter Bio++ Program Suite Reference
@@ -1400,6 +1634,7 @@ This section now details the specific options for each program in the Bio++ Prog
* bppphysamp:: Bio++ Phylogenetic Sampler.
* bppreroot:: Bio++ Serial Tree Re-rooting.
* bppseqman:: Bio++ Sequences Manipulation.
+* bppalnscore:: Bio++ Alignment Scoring
* bpptreedraw:: Bio++ Tree Drawing.
@end menu
@@ -1495,7 +1730,7 @@ BppML can also optimize branch lengths with a molecular clock:
@table @command
- at item optimize.clock=@{no|global@}
+ at item optimization.clock=@{no|global@}
Tell if a molecular clock should be assumed.
Topology estimation is not possible with a clock constraint.
@@ -1583,8 +1818,8 @@ The BppAncestor program uses the common syntax introduced in the previous sectio
Specific options are:
@table @command
- at item asr.method = @{marginal@}
-That's the only option for now!
+ at item asr.method = @{none|marginal@}
+Marginal is the only option for now. If set to "none", only nodes frequencies can be output.
@item asr.probabilities = @{boolean@}
Tells if we should output the site specific probabilities in each case.
@@ -1607,6 +1842,9 @@ Alignment information log file (site specific rates, probabilities, etc).
@item output.nodes.file = @{@{path@}|none@}
Ancestral nodes information: expected frequencies of ancestral states.
+ at item output.nodes.add_extant = @{boolean@}
+Tell if leaf nodes should be added to the output file.
+
@end table
@c ------------------------------------------------------------------------------------------------------------------
@@ -1756,7 +1994,7 @@ File where to write the rerooted trees.
@c ------------------------------------------------------------------------------------------------------------------
- at node bppseqman, bpptreedraw, bppreroot, Reference
+ at node bppseqman, bppalnscore, bppreroot, Reference
@section BppSeqMan: Bio++ Sequence Manipulation
The Bio++ Sequence Manipulator convert between various file formats, and can also perform various operations on sequences.
@@ -1807,6 +2045,9 @@ Change gaps to fully unresolved characters, N for nucleotides and X for proteins
Change (partially) unresolved characters to gaps.
@item RemoveStops
+Remove all stop codons in sequences. If sequences are aligned, stop codons will be replaced by gaps.
+
+ at item RemoveColumnsWithStops
Remove all sites with at least one stop codon.
@item GetCDS
@@ -1826,6 +2067,13 @@ Keep only complete sites, ie sites without any gap. Sites with unresolved charac
It is also possible to fix a maximum proportion of gaps, see specific options.
@option{maxGapAllowed}: The maximum proportion of gaps allowed.
+ at item GetCodonPosition(position=@{1|2|3@})
+Retrieve the given positions from codon sequences (aligned or not).
+
+ at item FilterFromTree(tree.file=@{path@}, tree.format=@{chars@})
+Get a subset of sequences based on a tree file. The order of sequences in the file will reflect the tree structure. All sequences which do not have a corresponding leaf in the tree, based on the sequence name, will be removed.
+This method can therefore be used for subsetting a list of sequences, and/or rearrange them in a more convenient manner.
+
@end table
Examples of use:
@@ -1861,7 +2109,61 @@ sequence.manip=KeepComplete(maxGapAllowed=30%),GapToUnknown
@c ------------------------------------------------------------------------------------------------------------------
- at node bpptreedraw, , bppseqman, Reference
+ at node bppalnscore, bpptreedraw, bppseqman, Reference
+ at section BppAlnScore: Bio++ Alignment Scoring
+
+This program compares two alignments and computes column scores.
+Scores are output to a text file, and/or can be used to generate
+a site selection, to be output in a mase file.
+
+The two input alignments are specified using the input.sequences
+procedures (@pxref{Sequences}), with suffixes ``.test'' for the
+first one, and ``.ref'' for the second. Scores will be computed
+for each column of the ``.test'' alignment.
+
+Two scores are computed, following work by Thompson (1999):
+ at table @emph
+ at item column score (CS)
+is 1 if the column is found in the reference alignment, 0 otherwise.
+ at item sum-of-pairs score (SPS)
+is the proportion of pairs of residues which are also aligned in
+the reference alignment.
+ at end table
+
+Specific options:
+ at table @command
+
+ at item output.scores = @{path@}
+A text file where scores can be written, one row per column.
+If set to 'none', no file will be produced.
+
+ at item output.mase = @{path@}
+If not 'none', a Mase alignment will be generated, as a copy
+of the ``.test'' input alignment, with two sites selections
+names CS and SPS.
+
+ at item output.sps_thresholds = @{float@}
+The threshold to use for generating the site selection based
+on SPS score. All positions with at least the threshold value
+will be included in the selection.
+
+ at item score.word_size = @{int>0@}
+If alignment is for a word alphabet (typically codons), the word
+size can be specified in order to produce a compatible site selection.
+Please note that in this case, the alignment must not be loaded with
+the world alphabet, but the corresponding letter alphabet.
+
+ at item score.phase = @{int>0|chars@}
+Eather a number (1-based) stating the starting position for words,
+or the starting word. In this latter case, the first occurrence of the
+word in all sequences will be used to determine the phase.
+
+ at end table
+
+
+ at c ------------------------------------------------------------------------------------------------------------------
+
+ at node bpptreedraw, , bppalnscore, Reference
@section BppTreeDraw: Bio++ Tree Drawing
This is a simple program that outputs a tree in various vector formats.
diff --git a/doc/version.texi b/doc/version.texi
deleted file mode 100644
index 7db2c33..0000000
--- a/doc/version.texi
+++ /dev/null
@@ -1,2 +0,0 @@
- at set VERSION 0.4
- at set UPDATED 1
diff --git a/man/CMakeLists.txt b/man/CMakeLists.txt
index a97b2d6..7bf1e2f 100644
--- a/man/CMakeLists.txt
+++ b/man/CMakeLists.txt
@@ -13,4 +13,5 @@ IF(MAN)
INSTALL(FILES bppreroot.1.gz DESTINATION share/man/man1)
INSTALL(FILES bppphysamp.1.gz DESTINATION share/man/man1)
INSTALL(FILES bpptreedraw.1.gz DESTINATION share/man/man1)
+ INSTALL(FILES bppalnscore.1.gz DESTINATION share/man/man1)
ENDIF(MAN)
diff --git a/man/bppalnscore.1.txt b/man/bppalnscore.1.txt
new file mode 100644
index 0000000..6761091
--- /dev/null
+++ b/man/bppalnscore.1.txt
@@ -0,0 +1,39 @@
+.TH BPPALNSCORE 1 LOCAL
+
+.SH NAME
+
+bppalnscore - Bio++ alignment scoring
+
+.SH SYNOPSIS
+
+.B bppalnscore [options]
+
+.SH AVAILABILITY
+
+All UNIX flavors
+
+.SH DESCRIPTION
+
+bppalnscore compares two alignment and compute corresponding column scores.
+Scores can be output to a text file, and/or used to generate a site selection
+to be output as a Mase file.
+
+.SH OPTIONS
+
+You should refer to 'info bppsuite' or to the online manual of bppsuite for a complete list of available options.
+
+.TP 5
+
+--noninteractive
+
+generates output for redirection in a file.
+
+.TP
+
+param=file
+
+reads a file for loading options
+
+.SH AUTHOR
+
+Bio++ Development Team.
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bppsuite.git
More information about the debian-med-commit
mailing list