[med-svn] [r-cran-seqinr] 07/15: New upstream version 3.3-3
Andreas Tille
tille at debian.org
Sat Sep 23 06:55:19 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-seqinr.
commit b00b2c4ca5d59646ebabe45673d6e65ff25af2ea
Author: Andreas Tille <tille at debian.org>
Date: Sat Sep 23 08:24:17 2017 +0200
New upstream version 3.3-3
---
DESCRIPTION | 32 +
MD5 | 343 +++
NAMESPACE | 80 +
R/AAstat.R | 31 +
R/ClassSeq.R | 84 +
R/GC.R | 182 ++
R/PI.R | 47 +
R/R_socket.R | 27 +
R/acnucclose.R | 30 +
R/acnucopen.R | 52 +
R/al2bp.R | 9 +
R/allistranks.R | 61 +
R/amb.R | 30 +
R/as.alignment.R | 9 +
R/as.matrix.alignment.R | 8 +
R/autosocket.R | 41 +
R/baselineabif.R | 19 +
R/bma.R | 20 +
R/cai.R | 11 +
R/choosebank.R | 153 ++
R/circle.R | 10 +
R/clfcd.R | 90 +
R/clientid.R | 21 +
R/closebank.R | 25 +
R/col2alpha.R | 4 +
R/comp.R | 44 +
R/consensus.R | 42 +
R/count.R | 38 +
R/countfreelists.R | 31 +
R/countsubseqs.R | 36 +
R/db.growth.R | 89 +
R/dia.bactgensize.R | 208 ++
R/dist.alignment.R | 41 +
R/dotPlot.R | 32 +
R/draw.oriloc.R | 73 +
R/draw.rearranged.oriloc.R | 57 +
R/draw.recstat.R | 185 ++
R/extract.breakpoints.R | 99 +
R/extractseqs.R | 113 +
R/fastacc.R | 29 +
R/gb2fasta.R | 39 +
R/gbk2g2.R | 86 +
R/gbk2g2.euk.R | 153 ++
R/get.ncbi.R | 388 +++
R/getAnnot.R | 44 +
R/getFrag.R | 47 +
R/getKeyword.R | 51 +
R/getLength.R | 34 +
R/getLocation.R | 56 +
R/getName.R | 22 +
R/getSequence.R | 73 +
R/getTrans.R | 35 +
R/getType.R | 31 +
R/getlistrank.R | 47 +
R/getliststate.R | 38 +
R/gfrag.R | 42 +
R/ghelp.R | 36 +
R/isenum.R | 80 +
R/kaks.R | 81 +
R/knowndbs.R | 48 +
R/lseqinr.R | 7 +
R/modifylist.R | 90 +
R/move.R | 9 +
R/oriloc.R | 234 ++
R/parser.socket.R | 26 +
R/peakabif.R | 68 +
R/permutation.R | 44 +
R/plot.SeqAcnucWeb.R | 110 +
R/plotPanels.R | 22 +
R/plotabif.R | 54 +
R/plotladder.R | 35 +
R/pmw.R | 1 +
R/prepgetannots.R | 81 +
R/prettyseq.R | 47 +
R/query.r | 137 +
R/quitacnuc.R | 12 +
R/read.abif.R | 156 ++
R/read.alignment.R | 35 +
R/read.fasta.R | 150 ++
R/readBins.R | 36 +
R/readPanels.R | 42 +
R/readfirstrec.R | 46 +
R/readsmj.R | 91 +
R/rearranged.oriloc.R | 65 +
R/recstat.R | 65 +
R/residuecount.R | 33 +
R/reverse.align.R | 86 +
R/rho.R | 16 +
R/rot13.R | 6 +
R/s2n.R | 23 +
R/savelist.R | 71 +
R/setlistname.R | 31 +
R/splitseq.R | 15 +
R/stresc.R | 21 +
R/stutterabif.R | 109 +
R/swap.R | 9 +
R/synonymous.R | 74 +
R/tablecode.R | 147 ++
R/test.co.recstat.R | 205 ++
R/test.li.recstat.R | 180 ++
R/translate.R | 74 +
R/trimSpace.R | 11 +
R/uco.R | 121 +
R/util.R | 93 +
R/where.is.this.acc.R | 55 +
R/words.R | 7 +
R/words.pos.R | 14 +
R/write.fasta.R | 39 +
R/zscore.R | 63 +
data/AnoukResult.RData | Bin 0 -> 319 bytes
data/ECH.RData | Bin 0 -> 38356 bytes
data/EXP.RData | Bin 0 -> 1066 bytes
data/JLO.RData | Bin 0 -> 39576 bytes
data/SEQINR.UTIL.RData | Bin 0 -> 1775 bytes
data/aacost.RData | Bin 0 -> 638 bytes
data/aaindex.RData | Bin 0 -> 81845 bytes
data/caitab.RData | Bin 0 -> 941 bytes
data/chargaff.RData | Bin 0 -> 351 bytes
data/clustal.RData | Bin 0 -> 422 bytes
data/datalist | 26 +
data/dinucl.RData | Bin 0 -> 65740 bytes
data/ec999.RData | Bin 0 -> 317767 bytes
data/fasta.RData | Bin 0 -> 1028 bytes
data/gcO2.rda | Bin 0 -> 6076 bytes
data/gcT.rda | Bin 0 -> 17828 bytes
data/gs500liz.RData | Bin 0 -> 176 bytes
data/identifiler.RData | Bin 0 -> 554 bytes
data/m16j.RData | Bin 0 -> 418892 bytes
data/mase.RData | Bin 0 -> 543 bytes
data/msf.RData | Bin 0 -> 838 bytes
data/pK.RData | Bin 0 -> 322 bytes
data/phylip.RData | Bin 0 -> 276 bytes
data/prochlo.RData | Bin 0 -> 412468 bytes
data/revaligntest.RData | Bin 0 -> 435 bytes
data/sysdata.rda | Bin 0 -> 1775 bytes
data/toyaa.RData | Bin 0 -> 164 bytes
data/toycodon.RData | Bin 0 -> 195 bytes
data/waterabs.RData | Bin 0 -> 568 bytes
debian/changelog | 13 -
debian/compat | 1 -
debian/control | 26 -
debian/copyright | 27 -
debian/rules | 7 -
debian/source/format | 1 -
debian/upstream/metadata | 9 -
debian/watch | 2 -
inst/CITATION | 15 +
inst/abif/1_0000206138_C01_005.fsa | Bin 0 -> 109299 bytes
inst/abif/1_FAC321_0000205983_B02_004.fsa | Bin 0 -> 111777 bytes
inst/abif/2_0000206138_C01_005.fsa | Bin 0 -> 109387 bytes
inst/abif/2_FAC321_0000205983_B02_004.fsa | Bin 0 -> 114833 bytes
inst/abif/AmpFLSTR_Bins_v1.txt | 1 +
inst/abif/AmpFLSTR_Panels_v1.txt | 1 +
inst/abif/NGM_Bins.txt | 3689 +++++++++++++++++++++++++++
inst/abif/NGM_Pa.txt | 215 ++
inst/abif/Promega_Bins_v1.txt | 1 +
inst/abif/Promega_Panels_v1.txt | 1 +
inst/abif/Prototype_PowerPlex_EP01_Bins.txt | 1 +
inst/abif/Prototype_PowerPlex_EP01_Pa.txt | 1 +
inst/abif/samplefsa2ps.fsa | Bin 0 -> 218313 bytes
inst/sequences/Anouk.fasta | 70 +
inst/sequences/DarrenObbard.fasta | 6 +
inst/sequences/ECOUNC.fsa | 133 +
inst/sequences/UBIQUITIN.mase | 19 +
inst/sequences/ame1.gbk | 581 +++++
inst/sequences/bb.acc | 20 +
inst/sequences/bb.kwd | 20 +
inst/sequences/bb.mne | 20 +
inst/sequences/bb.sp | 20 +
inst/sequences/bordetella.fasta | 38 +
inst/sequences/bordetella.pep.aln | 22 +
inst/sequences/ct.bfa | Bin 0 -> 521283 bytes
inst/sequences/ct.fasta.gz | Bin 0 -> 325963 bytes
inst/sequences/ct.gbk.gz | Bin 0 -> 678534 bytes
inst/sequences/ct.predict | 948 +++++++
inst/sequences/ecolicgpe5.fasta | 6 +
inst/sequences/gopher.fasta | 56 +
inst/sequences/gopher.names | 8 +
inst/sequences/hannah.txt | 1784 +++++++++++++
inst/sequences/humanMito.fasta | 283 ++
inst/sequences/input.dat | 2835 ++++++++++++++++++++
inst/sequences/input.out | 112 +
inst/sequences/legacy.fasta | 30 +
inst/sequences/louse.fasta | 57 +
inst/sequences/louse.names | 8 +
inst/sequences/malM.fasta | 17 +
inst/sequences/ortho.fasta | 30 +
inst/sequences/scuco.txt | 112 +
inst/sequences/seqAA.fasta | 5 +
inst/sequences/smallAA.fasta | 2 +
inst/sequences/smallAA.fasta.gz | Bin 0 -> 85 bytes
inst/sequences/someORF.fsa | 453 ++++
inst/sequences/test.aln | 27 +
inst/sequences/test.mase | 19 +
inst/sequences/test.msf | 57 +
inst/sequences/test.phylip | 12 +
man/AAstat.Rd | 33 +
man/AnoukResult.Rd | 29 +
man/ECH.Rd | 37 +
man/EXP.Rd | 99 +
man/GC.Rd | 202 ++
man/JLO.Rd | 49 +
man/SEQINR.UTIL.Rd | 27 +
man/SeqAcnucWeb.Rd | 38 +
man/SeqFastaAA.Rd | 41 +
man/SeqFastadna.Rd | 43 +
man/SeqFrag.Rd | 37 +
man/a.Rd | 55 +
man/aaa.Rd | 53 +
man/aacost.Rd | 70 +
man/aaindex.Rd | 618 +++++
man/acnucopen.Rd | 79 +
man/al2bp.Rd | 69 +
man/alllistranks.Rd | 55 +
man/amb.Rd | 53 +
man/as.alignment.Rd | 38 +
man/as.matrix.alignment.Rd | 26 +
man/autosocket.Rd | 28 +
man/baselineabif.Rd | 36 +
man/bma.Rd | 42 +
man/c2s.Rd | 26 +
man/cai.Rd | 95 +
man/caitab.Rd | 73 +
man/chargaff.Rd | 131 +
man/choosebank.Rd | 97 +
man/circle.Rd | 45 +
man/closebank.Rd | 27 +
man/clustal.Rd | 13 +
man/col2alpha.Rd | 49 +
man/comp.Rd | 64 +
man/computePI.Rd | 41 +
man/consensus.Rd | 103 +
man/count.Rd | 95 +
man/countfreelists.Rd | 41 +
man/countsubseqs.Rd | 39 +
man/crelistfromclientdata.Rd | 93 +
man/dia.bactgensize.Rd | 83 +
man/dinucl.Rd | 65 +
man/dist.alignment.Rd | 41 +
man/dotPlot.Rd | 88 +
man/dotchart.uco.Rd | 60 +
man/draw.oriloc.Rd | 69 +
man/draw.rearranged.oriloc.Rd | 66 +
man/draw.recstat.Rd | 34 +
man/ec999.Rd | 36 +
man/extract.breakpoints.Rd | 91 +
man/extractseqs.Rd | 89 +
man/fasta.Rd | 10 +
man/fastacc.Rd | 196 ++
man/figures/aka.pdf | Bin 0 -> 17307 bytes
man/figures/chargaff.png | Bin 0 -> 79475 bytes
man/figures/gcskewmbe96.pdf | Bin 0 -> 29632 bytes
man/figures/introduction-dbg.pdf | Bin 0 -> 12554 bytes
man/figures/lncs2004.pdf | Bin 0 -> 54378 bytes
man/figures/waterabs.jpg | Bin 0 -> 59360 bytes
man/gb2fasta.Rd | 40 +
man/gbk2g2.Rd | 38 +
man/gbk2g2.euk.Rd | 38 +
man/gcO2.Rd | 21 +
man/gcT.Rd | 34 +
man/get.db.growth.Rd | 61 +
man/get.ncbi.Rd | 41 +
man/getAnnot.Rd | 88 +
man/getFrag.Rd | 59 +
man/getKeyword.Rd | 53 +
man/getLength.Rd | 58 +
man/getLocation.Rd | 54 +
man/getName.Rd | 57 +
man/getSequence.Rd | 79 +
man/getTrans.Rd | 118 +
man/getType.Rd | 30 +
man/getlistrank.Rd | 46 +
man/getliststate.Rd | 51 +
man/gfrag.Rd | 37 +
man/ghelp.Rd | 43 +
man/gs500liz.Rd | 43 +
man/identifiler.Rd | 27 +
man/isenum.Rd | 61 +
man/kaks.Rd | 101 +
man/knowndbs.Rd | 52 +
man/lseqinr.Rd | 23 +
man/m16j.Rd | 101 +
man/mase.Rd | 10 +
man/modifylist.Rd | 85 +
man/move.Rd | 43 +
man/msf.Rd | 9 +
man/n2s.Rd | 40 +
man/oriloc.Rd | 147 ++
man/pK.Rd | 88 +
man/parser.socket.Rd | 28 +
man/peakabif.Rd | 68 +
man/permutation.Rd | 67 +
man/phylip.Rd | 10 +
man/plot.SeqAcnucWeb.Rd | 35 +
man/plotPanels.Rd | 26 +
man/plotabif.Rd | 75 +
man/plotladder.Rd | 59 +
man/pmw.Rd | 80 +
man/prepgatannots.Rd | 73 +
man/prettyseq.Rd | 35 +
man/print.SeqAcnucWeb.Rd | 32 +
man/print.qaw.Rd | 30 +
man/prochlo.Rd | 119 +
man/query.Rd | 116 +
man/read.abif.Rd | 80 +
man/read.alignment.Rd | 116 +
man/read.fasta.Rd | 177 ++
man/readBins.Rd | 56 +
man/readPanels.Rd | 59 +
man/readfirstrec.Rd | 56 +
man/readsmj.Rd | 40 +
man/rearranged.oriloc.Rd | 97 +
man/recstat.Rd | 62 +
man/residuecount.Rd | 36 +
man/revaligntest.Rd | 22 +
man/reverse.align.Rd | 111 +
man/rot13.Rd | 34 +
man/s2c.Rd | 27 +
man/s2n.Rd | 65 +
man/savelist.Rd | 45 +
man/seqinr-package.Rd | 20 +
man/setlistname.Rd | 45 +
man/splitseq.Rd | 41 +
man/stresc.Rd | 27 +
man/stutterabif.Rd | 83 +
man/swap.Rd | 54 +
man/syncodons.Rd | 113 +
man/synsequence.Rd | 29 +
man/tablecode.Rd | 46 +
man/test.co.recstat.Rd | 48 +
man/test.li.recstat.Rd | 49 +
man/toyaa.Rd | 29 +
man/toycodon.Rd | 20 +
man/translate.Rd | 128 +
man/trimSpace.Rd | 74 +
man/uco.Rd | 124 +
man/ucoweight.Rd | 28 +
man/waterabs.Rd | 95 +
man/where.is.this.acc.Rd | 31 +
man/words.Rd | 49 +
man/words.pos.Rd | 49 +
man/write.fasta.Rd | 44 +
man/zscore.Rd | 106 +
src/Makevars | 2 +
src/Makevars.win | 2 +
src/alignment.c | 917 +++++++
src/alignment.h | 33 +
src/fastacc.c | 39 +
src/getzlibsock.c | 244 ++
src/kaks.c | 1125 ++++++++
src/util.c | 72 +
src/zsockr.c | 169 ++
352 files changed, 30458 insertions(+), 86 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100755
index 0000000..08633e5
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,32 @@
+Encoding: UTF-8
+Package: seqinr
+Version: 3.3-3
+Date: 2016-10-12
+Title: Biological Sequences Retrieval and Analysis
+Authors at R: c(person("Delphine", "Charif", role = "aut"),
+ person("Olivier", "Clerc", role = "ctb"),
+ person("Carolin", "Frank", role = "ctb"),
+ person(c("Jean","R."), "Lobry", role = c("aut", "cph")),
+ person("Anamaria", "Necşulea", role = "ctb"),
+ person("Leonor", "Palmeira", role = "ctb"),
+ person("Simon", "Penel", role = "cre", email = "simon.penel at univ-lyon1.fr"),
+ person("Guy", "Perrière", role = "ctb"))
+Author: Delphine Charif [aut], Olivier Clerc [ctb], Carolin Frank [ctb], Jean R. Lobry [aut, cph], Anamaria Necşulea [ctb], Leonor Palmeira [ctb], Simon Penel [cre], Guy Perrière [ctb]
+Maintainer: Simon Penel <simon.penel at univ-lyon1.fr>
+BugReports:
+ http://lists.r-forge.r-project.org/cgi-bin/mailman/listinfo/seqinr-forum
+Imports: ade4,segmented
+Depends: R (>= 2.10.0)
+Description: Exploratory data analysis and data visualization
+ for biological sequence (DNA and protein) data. Includes also
+ utilities for sequence data management under the ACNUC system.
+License: GPL (>= 2)
+SystemRequirements: zlib headers and library.
+URL: http://seqinr.r-forge.r-project.org/
+NeedsCompilation: yes
+Repository: CRAN
+Repository/R-Forge/Project: seqinr
+Repository/R-Forge/Revision: 2020
+Repository/R-Forge/DateTimeStamp: 2016-10-12 17:32:34
+Date/Publication: 2016-10-13 20:37:13
+Packaged: 2016-10-12 17:45:23 UTC; rforge
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..0d7f920
--- /dev/null
+++ b/MD5
@@ -0,0 +1,343 @@
+572ca39a8b8fae2be5f07aafed8c509d *DESCRIPTION
+f4eeeb197363233719e576484e515136 *NAMESPACE
+dd981a8bc19bea5e91c256f75ea8b5c9 *R/AAstat.R
+7175d6708d717dca65176df83a41f9bd *R/ClassSeq.R
+1e91dc3520dc9fd00e6e78da768c4b2e *R/GC.R
+633531e5d119f908e2ed61c2063515d9 *R/PI.R
+053a0187d0fb50f067967b45e2cbbcac *R/R_socket.R
+d0fddeea0d82c2ce6be501fb845e68db *R/acnucclose.R
+b899f1793668edaf00f2fc34bf3f8156 *R/acnucopen.R
+6c231aebdc9d78beed8b128ca9497b3a *R/al2bp.R
+f1c181a43cd515afc40912c23d57b48f *R/allistranks.R
+e8ca816b2688ccdff894fe7484a5219b *R/amb.R
+dcb9fdf6787b486e715eb32fcad782f9 *R/as.alignment.R
+23dd9489492a03efabb30f8b942db543 *R/as.matrix.alignment.R
+ef4ecfea9f3ba3e7fa6d4d22d3fc5c87 *R/autosocket.R
+79270da6013f8c0da557d49df4c6fb93 *R/baselineabif.R
+5703b2a2d40aff5cdb4fe98e03b1806c *R/bma.R
+e8d88ddfefb2671a94f95c53e8c22c57 *R/cai.R
+5f15ab5f63f71b752039ca1ced27a28b *R/choosebank.R
+812ffd4010c7035837d2fd0d4db290f5 *R/circle.R
+d8fab11dbb02bd793828704fb6903c51 *R/clfcd.R
+b25f52a33ccb2f6f66fcb30e1e74680e *R/clientid.R
+00bc719de0ccf65560a86ec794b760fe *R/closebank.R
+f6f6dc5c585d71ee9e664db3d9e9453a *R/col2alpha.R
+a54741d7bafe66fe670b789236cc22fa *R/comp.R
+26ff4107c8fc92f96020fc8551688aa9 *R/consensus.R
+70903c3ea1da6bcb6d2b0b77960f86a7 *R/count.R
+3d5c97c10129de81da8b0b7c8006bdf3 *R/countfreelists.R
+e354a7785d3f515d4311ddf75b54fc71 *R/countsubseqs.R
+170a8077be5e66cd08ca1fba10209d24 *R/db.growth.R
+1394355049f1bb135f481a50508994d5 *R/dia.bactgensize.R
+d47075fe840da6f000af83ece511af2c *R/dist.alignment.R
+3dc6f329eea0d8db614bfb6ded99c056 *R/dotPlot.R
+e7c8d78234ac7a398766943c2b179d91 *R/draw.oriloc.R
+c5d3818fb836288a75c41ce6dd7c19e0 *R/draw.rearranged.oriloc.R
+4f7b1e44bac0700d4d313876c1f19639 *R/draw.recstat.R
+c83bc7ce2217e6f215b18376e72b650f *R/extract.breakpoints.R
+b89dfd6ad6584d989cedc6960af214ec *R/extractseqs.R
+7cbf5e8324af34371d3917496bc23dfa *R/fastacc.R
+e74c8ba0da1fb405e080065261728f9c *R/gb2fasta.R
+2d5372f6f01f90ea00be0599d686cfbc *R/gbk2g2.R
+5c25127bfa87a9c0ae5cc0303e2d5760 *R/gbk2g2.euk.R
+56a71d800186da36f5817641353d1bb3 *R/get.ncbi.R
+97394927a67bc9051c6c2d897e2b4c8c *R/getAnnot.R
+06ee37a0f5cdfc6ac972635c90a4ca2d *R/getFrag.R
+6f45dd26e77c33f1a68b30394df40537 *R/getKeyword.R
+414a50f37334fdf5c20e32de06a44c62 *R/getLength.R
+bd6ff768d092ad6573c2b719c7d2399d *R/getLocation.R
+c4b6fc3ccb3bcdec62bdebced9263660 *R/getName.R
+0a36d73d9d155c5b7a3704ab73bee883 *R/getSequence.R
+5f3525ffdeb401675378531efa06094b *R/getTrans.R
+b582f5a822b09825f9f4dabc2e220cc4 *R/getType.R
+77de234689104f105b66bdaf4ebbd886 *R/getlistrank.R
+31a6e8b2f9594596d677b57f3906b677 *R/getliststate.R
+e2af7c88e0e7c42cbb6e6208aa6364d8 *R/gfrag.R
+1b9650fd96f5cbce29b1b79015ad99c5 *R/ghelp.R
+ade42b1d2a6034de880e6676cf1f5da5 *R/isenum.R
+22d9950d657c07d38f245a908f615150 *R/kaks.R
+106b5a8d04534e8441081bdadc23959f *R/knowndbs.R
+3ea277d7f032f1d31004cda14e46fbab *R/lseqinr.R
+6e4f1e677e5d870e8c5b3bf57fa25b91 *R/modifylist.R
+a3da681f32a58026a42e57d6a5baa03c *R/move.R
+ed4500d5696fcea0e4230f0655e7a716 *R/oriloc.R
+94847d1d09e2ea265da9f736f732487a *R/parser.socket.R
+c0fd92be7137f64874e66a0d3e3f713d *R/peakabif.R
+daadbfebc84beef3d1e3fa1b511d1d82 *R/permutation.R
+501e668ee6fcdaa032058914567191d4 *R/plot.SeqAcnucWeb.R
+2869a2eb6a2c8a3dcf8d70cdb136d6c4 *R/plotPanels.R
+8d5d7797b913d7eb039c5e2a125ff5ec *R/plotabif.R
+edc4cc62d55ced70244d2db9bb71b815 *R/plotladder.R
+ae47cf06593fc89cb8a105dfb69b6e93 *R/pmw.R
+6e740c9d557d3f42fd31ce5ba3067a9c *R/prepgetannots.R
+0e735cb30f6e370e4e494ad9dcba14d1 *R/prettyseq.R
+cb9f8a17e0bdafa1fe7b8a4584eb8ab0 *R/query.r
+126d5ade572e95511b2b66b7189807d9 *R/quitacnuc.R
+5c829cc5536f04c051d686cf1b553859 *R/read.abif.R
+568dca4f1a96d5619854a9e30ee60f58 *R/read.alignment.R
+bbed196d15ef9fb2b8d4da9d294ab5ed *R/read.fasta.R
+8a46bbfddc96af2244eb21d7fa7157b6 *R/readBins.R
+7fa1d45bb89c1229cd494c007ce94d5f *R/readPanels.R
+5d684c3f0beda26fa6e1d9bfd063680d *R/readfirstrec.R
+12b4af8afe3c4af7b65070201db7082c *R/readsmj.R
+76ecc72faa60145380275581474f98dc *R/rearranged.oriloc.R
+98f98cdd0cb535cce91bad01c47192d0 *R/recstat.R
+e27d133de947ce3880ed4e300c676a84 *R/residuecount.R
+ca1c94e97783b4a68a30df6ab1cad560 *R/reverse.align.R
+6b928f22960b6446a41f5b305555979c *R/rho.R
+8f025ccd0924af255f041a917f7a3787 *R/rot13.R
+50393c64f02c89723db803c22366329f *R/s2n.R
+be01cbe2fc2ee4b08079bc7b8b396e01 *R/savelist.R
+848e8f8bd67e29e39e1e46a026d003b4 *R/setlistname.R
+29e473837b15cb94cb62419eab7a93e5 *R/splitseq.R
+b081eb607b9782f99653e50cfb1e9d3f *R/stresc.R
+b06cb8115a0f2f568cbc8946908a52ee *R/stutterabif.R
+46ac2ee30965434e3687b259691a69cd *R/swap.R
+77ea763a58167e3de49c1ad98babcc3c *R/synonymous.R
+478b66c88728d9d18b394b4868b00f58 *R/tablecode.R
+0134ec6749f84bc8aa7596850f784316 *R/test.co.recstat.R
+ab86b9a3f2568f102986b6fb93a545bc *R/test.li.recstat.R
+a3350838cdb114e256e2441990c0dfa1 *R/translate.R
+db37533bde4cb23943363ec1649c395b *R/trimSpace.R
+30f245251279c3e71f13f4e96ef30079 *R/uco.R
+3c550b2b0e6b7f012c5884ec97a5f2b3 *R/util.R
+5c6a726318b034002ec8478005d0ec54 *R/where.is.this.acc.R
+4e4e196dab3926727e78e6af44616ebd *R/words.R
+a56eb0ff10c2eb36431885fa78e0fdb8 *R/words.pos.R
+204830bfeeef59878373d10b947e6fc2 *R/write.fasta.R
+6daeda4870b681a9def5e87b0c2a692c *R/zscore.R
+fb5ea3868c5553ee8200b6ccaa8be14e *data/AnoukResult.RData
+89c880c7792e1fd0f247a675b00eac7e *data/ECH.RData
+9cc885d5878894f15e4180c920ad27bd *data/EXP.RData
+0b616d65d5b57d1386cf6fc4ebf9ee15 *data/JLO.RData
+a5062809357a2fe2539c70140d930245 *data/SEQINR.UTIL.RData
+1c88524ef8cd3758aa248f064deae5ce *data/aacost.RData
+30ce52aad5219c69e83336b93f7a162f *data/aaindex.RData
+f68ee04f58aadf44274a164adbe00821 *data/caitab.RData
+3af9fcdfaaf9d05c69a752710834a5c5 *data/chargaff.RData
+a83b249863be4cd4318d8e88922f7186 *data/clustal.RData
+806f1a3796b1e80cb87cb5b538b2586f *data/datalist
+6d345d4fefb0514cca31ace5fbc88730 *data/dinucl.RData
+39a1ebbe4ecb0e5acd1be77c70b2b700 *data/ec999.RData
+2813531b2d6fbd35e92467f338135660 *data/fasta.RData
+a68302621e3d01c9b556ad0181a9c70f *data/gcO2.rda
+76dd231427b0f6094387b1fd881f6cc7 *data/gcT.rda
+46a8298a82862a0a4b314f2769fa45c8 *data/gs500liz.RData
+3289605c76c50413d7ec66a0e7eae1bc *data/identifiler.RData
+50245edb26f8168405f878587e41f8c2 *data/m16j.RData
+ac915f82c50f06f8b16dc57cee87fd18 *data/mase.RData
+3d22106f5b1db1c0b0932367624d6124 *data/msf.RData
+e9b35c92761a0bba2f6ed13c3809cf90 *data/pK.RData
+b5aff4e0546952e316c8adeb40e7fffe *data/phylip.RData
+1828730dd85cef8c8c39ad05af6d6a01 *data/prochlo.RData
+d42f8673a6bbfde5ed82e67309303f3c *data/revaligntest.RData
+a5062809357a2fe2539c70140d930245 *data/sysdata.rda
+ed4eef2ae0c20040b249777ae83f36f7 *data/toyaa.RData
+7e5270892f1ac2b68a267f525a0f188b *data/toycodon.RData
+f8df825ad5220bae76d3a9378ecf99b9 *data/waterabs.RData
+df797f2cdf697b330d68ac8194db7a53 *inst/CITATION
+79179e6254bc1f46eb07303d85aa084e *inst/abif/1_0000206138_C01_005.fsa
+45ad0f3fea5bf2143fc74a810a519945 *inst/abif/1_FAC321_0000205983_B02_004.fsa
+831e4bb52c0b2ab78a9246dca7e67d58 *inst/abif/2_0000206138_C01_005.fsa
+88860b1ef18d69b54837a85b916ae697 *inst/abif/2_FAC321_0000205983_B02_004.fsa
+a16fac50a201d4b38ec3204c8737df45 *inst/abif/AmpFLSTR_Bins_v1.txt
+4e0288d7d9d76672c7ff8ff0d05fe0ad *inst/abif/AmpFLSTR_Panels_v1.txt
+008e815eb4a7a94a46461d265c64ae78 *inst/abif/NGM_Bins.txt
+9f64398f12a862adadc4ddea9374e5bf *inst/abif/NGM_Pa.txt
+48281c9d741c32bfddc8ae63aefc8df0 *inst/abif/Promega_Bins_v1.txt
+13d0415163c2b968875073df7abecc06 *inst/abif/Promega_Panels_v1.txt
+118cf8e86dbab633734283bb8968aebd *inst/abif/Prototype_PowerPlex_EP01_Bins.txt
+480c7bcaed36937e62d611aaea8845e7 *inst/abif/Prototype_PowerPlex_EP01_Pa.txt
+115afc50e7118a18a18bb7078eb22e53 *inst/abif/samplefsa2ps.fsa
+a162dc1c3b4461c33c1644320e84cea5 *inst/sequences/Anouk.fasta
+b0706a24d5abe1df7af2c8038ca7bebb *inst/sequences/DarrenObbard.fasta
+76b03fc33829fb97bd5d1a04d283a6ab *inst/sequences/ECOUNC.fsa
+b3d34cd481e4f6bf843b78c5e17c079f *inst/sequences/UBIQUITIN.mase
+ca11c1c6cecbb254b394ee24bff10d17 *inst/sequences/ame1.gbk
+ce707593c6e9f43ac3f77f2276b2fc73 *inst/sequences/bb.acc
+3ef93a6e32d1929ff3ee0abdb5ac96dd *inst/sequences/bb.kwd
+8eb44c1f8c1e1b2d40bab5106ec657c3 *inst/sequences/bb.mne
+cd5b7b52fe1bd7a49107cfe7a4fa1311 *inst/sequences/bb.sp
+48ba932397d95293cf4b497773e83723 *inst/sequences/bordetella.fasta
+e3ad28db2c00b341c41a805cadf73f9a *inst/sequences/bordetella.pep.aln
+d63ad070003bd2b8bb17ab0b1b552ebc *inst/sequences/ct.bfa
+d742e6180004ba6b447d0991c58ccb4d *inst/sequences/ct.fasta.gz
+d00ceb9d08f1c583b35a12031a286ec7 *inst/sequences/ct.gbk.gz
+d14166d4b9592dedfb53124ea81795a7 *inst/sequences/ct.predict
+d0ff0477e562d8aa3c0979a4a33a2226 *inst/sequences/ecolicgpe5.fasta
+6dc7e60dac18296f9e1d719f39f03b84 *inst/sequences/gopher.fasta
+1d5bd50d5ff345e08cb37a8689303e0c *inst/sequences/gopher.names
+cf31afbaea651a30e8f2b530483830bb *inst/sequences/hannah.txt
+e03266d6ed9efa1dad9755a88e024565 *inst/sequences/humanMito.fasta
+c56d8223a10995c70a6f965e46a06b44 *inst/sequences/input.dat
+f82ee7968ac65ab690bdc1cfd10f055d *inst/sequences/input.out
+1b714fae98378e5142d7ec658127ef36 *inst/sequences/legacy.fasta
+d84b82c9cbc37133cb4d1f4e3c651878 *inst/sequences/louse.fasta
+49138a950cfee1b2ebce2f95e6c4569c *inst/sequences/louse.names
+065911f125ae3362e8f96bb2a00b8cd0 *inst/sequences/malM.fasta
+7e22ca9211af371ebdbc483acec22bb7 *inst/sequences/ortho.fasta
+a36f3e67af4df89313472eab72315b4d *inst/sequences/scuco.txt
+1e41a172ef6a1eb538aa4471da6d2ac9 *inst/sequences/seqAA.fasta
+27dd64cbdcdc43cc06b8c7cfdad2c3fb *inst/sequences/smallAA.fasta
+08ea8a3c06ab7017c0e5f8b83af8a022 *inst/sequences/smallAA.fasta.gz
+717a230459ee63b4bf752db0af85ee7d *inst/sequences/someORF.fsa
+05d2cec3375c0599b1872a0697f40b91 *inst/sequences/test.aln
+b8cf915491772d56bfd779c64156c424 *inst/sequences/test.mase
+2dc0b9b12f1f303cfb7cb6e88418718a *inst/sequences/test.msf
+e775cbe3a62d9495d643758a7a43c9ab *inst/sequences/test.phylip
+5bc8c88f5db8076d8d036c4ca91e1562 *man/AAstat.Rd
+98fa02321f2d444eb742e45c61ab0ea5 *man/AnoukResult.Rd
+8d735c4f859717374f61bd4b8094060c *man/ECH.Rd
+b477890899d95267de5249664a241350 *man/EXP.Rd
+e17700422fa641e87555e16227013fb3 *man/GC.Rd
+aeae19d233c41f0aab08ffa706febc9e *man/JLO.Rd
+4e32511953712c0ad3a6cecc9f79d434 *man/SEQINR.UTIL.Rd
+00d077cdf4651fec354a3cc4f60f026d *man/SeqAcnucWeb.Rd
+fdda8a20d94b93b4c9dd587763e8ea96 *man/SeqFastaAA.Rd
+78e79e0e28cd1c507275a270f2a877e4 *man/SeqFastadna.Rd
+77e0cd2af40b5f07142a00165561113b *man/SeqFrag.Rd
+0291681ef4c61b5f969911c02e1d9c3c *man/a.Rd
+204a173a6e1f8d01d680df4962e9297c *man/aaa.Rd
+ad14914091767386f1c7bf9abef15640 *man/aacost.Rd
+cf17c6b4556bffdbc7b2cc56bf13298b *man/aaindex.Rd
+149dacde36bf3d2b4f8cc7a94a39c1db *man/acnucopen.Rd
+4472367be41364ac125ff53890311842 *man/al2bp.Rd
+91ce2a2d56194034c02d02720077c7a1 *man/alllistranks.Rd
+44ea7c5018d8cb72a5c9221244d4ed47 *man/amb.Rd
+37cd629598f0292ec8901b1880636d4d *man/as.alignment.Rd
+84e9ef1f9c61bc160a3e4a369879bfd7 *man/as.matrix.alignment.Rd
+5b36d14adaa4dfaee6e59e6640ef722f *man/autosocket.Rd
+441038ae274e763b08fb919baff18c33 *man/baselineabif.Rd
+0066bc893874ff25f429c9fc607555ec *man/bma.Rd
+1720c015d702002313d2404bf34dd746 *man/c2s.Rd
+fd3a107f68cc6649b15a7d32e5e3745a *man/cai.Rd
+46074b10c09034f3d482fbe99217d9fb *man/caitab.Rd
+0b3e3b1533ec40155cce9dbde1a6a93f *man/chargaff.Rd
+fd9087898a7e4ce52e8c2852980f6310 *man/choosebank.Rd
+fd469bea7adfc60d7730d80af5c51db8 *man/circle.Rd
+ab61ec20db72ccfaa6cdc78e3bb04ed4 *man/closebank.Rd
+ad182b206f059b71eafd4ebf8d5404d4 *man/clustal.Rd
+06bae4306ad66fb9d4d99098468345da *man/col2alpha.Rd
+061c6f67f659389d8a8c41249f080112 *man/comp.Rd
+9112a5426f73b6d49e9adcce73b0ece0 *man/computePI.Rd
+493de05da072a143c93b18139de195ff *man/consensus.Rd
+3d65fbcb5620d562c5e02e2de950b487 *man/count.Rd
+fcf4e9b4e4f667a220340e32bf3e8394 *man/countfreelists.Rd
+d8c994f0a0bc546ae8456ad895772671 *man/countsubseqs.Rd
+3fff04eb83f10560f7632f2c77b6a84b *man/crelistfromclientdata.Rd
+1940be2cbdf8e773e8206103f55b3fbf *man/dia.bactgensize.Rd
+9463b95f58a2e0eae9c9d09f692fc607 *man/dinucl.Rd
+e116cf59ab497b198e0cf53313493b0c *man/dist.alignment.Rd
+fb1715453475d9bc554fa3803e9d1ed9 *man/dotPlot.Rd
+635d745b4d9d002b0136ed326ce302e4 *man/dotchart.uco.Rd
+7d235f8acb82c7c2cec55a7ee31b32eb *man/draw.oriloc.Rd
+47ceb185e4d32c72952960cce8137126 *man/draw.rearranged.oriloc.Rd
+cb50f090cefbd8f07526064d3760525e *man/draw.recstat.Rd
+18bcac1138a512c806a655b9795d639a *man/ec999.Rd
+908663409747a04f600903dcd9b11dd3 *man/extract.breakpoints.Rd
+8827cc3bfa003efef61b41cf9738d8d0 *man/extractseqs.Rd
+d2b9d3566289986cbded9e3ec42b349b *man/fasta.Rd
+80aa1764068499873841d12e41d2ac97 *man/fastacc.Rd
+1909e751870159d64935144c2101f313 *man/figures/aka.pdf
+31afa09d0a211d83f19765f80dae8405 *man/figures/chargaff.png
+89bf97b50f807a88db67fb0e67c6fcfd *man/figures/gcskewmbe96.pdf
+343cb1ec3c1c3e8d513b399c53021bfd *man/figures/introduction-dbg.pdf
+6d0d8de5faae0b428b68279c093418fe *man/figures/lncs2004.pdf
+3cb1e3e0bd34e5d0a66aa695d7d57e92 *man/figures/waterabs.jpg
+ebd1dd082d26b71029c3d5c478c34b47 *man/gb2fasta.Rd
+dc73697f22c67c0762ceae6cc04f4f1f *man/gbk2g2.Rd
+3013a472ea7f9eb0e83e58eccd8915b2 *man/gbk2g2.euk.Rd
+23ce44a155027f1adf77018b5000fb84 *man/gcO2.Rd
+4683c2678cd9bac7e8da85772869fa18 *man/gcT.Rd
+43e1ba4a463958d3fc30dd834cf84f51 *man/get.db.growth.Rd
+97a1bc320a34ee6919ae9d5a537d8ed2 *man/get.ncbi.Rd
+9b66028b42d96e7429140aa57655d886 *man/getAnnot.Rd
+f997c1167e5903376b1f15789c1fa5de *man/getFrag.Rd
+54545ba9f510e782a006733a3e3e8fb3 *man/getKeyword.Rd
+c6528d6372e7ac069263526d82617c5e *man/getLength.Rd
+36cd87460ede81e62b98a146498421a0 *man/getLocation.Rd
+08984b8a287f0d4a8463dafd47d0a336 *man/getName.Rd
+079d7a8f6263a46bbed6c8393c96fc47 *man/getSequence.Rd
+8e0538cbbb212da6f846fce672664651 *man/getTrans.Rd
+6c69abe312462e20291bf121926d4529 *man/getType.Rd
+dc9361185c4cc068c74f243ae3085fb5 *man/getlistrank.Rd
+a36f61d22d9b9f7157c32a08a598f5e0 *man/getliststate.Rd
+2be6d73cdd7da150f01c304ca9799d5f *man/gfrag.Rd
+dedec3250e5f737066c28cbc819f9a75 *man/ghelp.Rd
+79f3dc6846a328f749391c2333b840c5 *man/gs500liz.Rd
+9e88c23480ad91dc30ad2e5f4a452932 *man/identifiler.Rd
+193b78a2d66647ef32a26d3a6a9cb570 *man/isenum.Rd
+d1c3109d8cb096356e2a03d1e82c4693 *man/kaks.Rd
+99cc2e82b216d68651eae627f741ed68 *man/knowndbs.Rd
+a4394470a6facecb904714bfd79bbbec *man/lseqinr.Rd
+e65af94d33f2668d6be2742b85657558 *man/m16j.Rd
+0c91047619417b66e80545dd84dafb09 *man/mase.Rd
+428391fede8a9ffddbedfe9f4989b795 *man/modifylist.Rd
+4f32da53eef0d1cadb1547bf25d463cf *man/move.Rd
+d28add4f994e4b9b07aa0d2322ec555f *man/msf.Rd
+82c40f0f71a304c5778f2e5a271f18e5 *man/n2s.Rd
+35b13ee74557aa68fd8c6536b2b58f60 *man/oriloc.Rd
+333457a72f8a13ed0558924f0adfa49d *man/pK.Rd
+56afbbbf5ea360c7f53d43eba732aee8 *man/parser.socket.Rd
+723e1c897ddc1cd527e4b68e72b61d16 *man/peakabif.Rd
+22c29a51033b98732641ecabc88aa95c *man/permutation.Rd
+dc0960ab515405c7b1d8022b4c614e0e *man/phylip.Rd
+9c388b866ae60b1acce9f0c3ec999275 *man/plot.SeqAcnucWeb.Rd
+a62959a08a07e71f2206b6097702a440 *man/plotPanels.Rd
+a8725647a735b22aa4f194f6aeed868f *man/plotabif.Rd
+7d9873da33370d083c9d39329627ae56 *man/plotladder.Rd
+35f13e91f9a71ebc7767a9c572db8d7a *man/pmw.Rd
+4d333e904b9949a644c38106b09fb0b7 *man/prepgatannots.Rd
+27747750abb1b3bd78182a6ebaae7188 *man/prettyseq.Rd
+c369db698c76ac28ed5787faebb47387 *man/print.SeqAcnucWeb.Rd
+07d15e5a23f18d903a96302cfbba940f *man/print.qaw.Rd
+1e9a408d0b57e4c792e25d9a9e7f83ab *man/prochlo.Rd
+b45b01e1ed92fa048fd1d40f3587a0ec *man/query.Rd
+8f1e254ca171e28fdd2f88c9b3d4c417 *man/read.abif.Rd
+56884b5272ea697f14c2f5321bdc2707 *man/read.alignment.Rd
+5f1576f8259554c751aa3428bb8de0d4 *man/read.fasta.Rd
+a5a96ae19a122c96b13cf9d32fc15656 *man/readBins.Rd
+00603d098cc42569cb59dcbbf1b3d4a0 *man/readPanels.Rd
+b8d6f7150920037ff15a5f4eeb578fbc *man/readfirstrec.Rd
+38424b3fa194191b2e0b5edf42cc5c29 *man/readsmj.Rd
+065d854ab9770b51f0136b63963dfe98 *man/rearranged.oriloc.Rd
+a5eae3aeec62fad12827474fc0973767 *man/recstat.Rd
+d1188268a056f743d2d782ea54d68be1 *man/residuecount.Rd
+717eed2b3208488bd82c4686b52e9a05 *man/revaligntest.Rd
+c680b6edd53dec52508f3d3d3d600e76 *man/reverse.align.Rd
+389b1c787640bcb2aade946807fe85f4 *man/rot13.Rd
+c5807ec9d1898ea9fab444e85f0d0fda *man/s2c.Rd
+b877e6e8b74fada866263ac3dc64d264 *man/s2n.Rd
+917277947bfa162ccfbd7d80c1609667 *man/savelist.Rd
+c8461919f0ad8179d0662483f8080e52 *man/seqinr-package.Rd
+da3fbcd08721968829b8a86a22700784 *man/setlistname.Rd
+83c1874b1ebb8d8953e72fe682c31eca *man/splitseq.Rd
+b4404410925156b19be401751a1be19c *man/stresc.Rd
+12718c93289416af2258e39b8ed9908a *man/stutterabif.Rd
+a2e9ab7c4069802284e1e4c571cac99a *man/swap.Rd
+46ce2de359e08867220d1ad54736f0e5 *man/syncodons.Rd
+bde0f959c086fcf0f896762e19cabf76 *man/synsequence.Rd
+426d1150c8531a0674d1dfe6abf118d4 *man/tablecode.Rd
+1e334f4e5498f14856ca6b57d151c9fa *man/test.co.recstat.Rd
+60b2f5b613c3bcac7d41a8882266de9b *man/test.li.recstat.Rd
+9c43436214e29c3b5ef859416f791904 *man/toyaa.Rd
+fcf2532a41bee2ffd4f24dfb8b11ecf5 *man/toycodon.Rd
+3408955b913e3c2e35c30366d6310aaf *man/translate.Rd
+42024df78f45d2b2d5a97d3faec0767b *man/trimSpace.Rd
+8508d24d8bc2aa9d21838664b8cf87a2 *man/uco.Rd
+153d7c57d14cbe82c15201609e545007 *man/ucoweight.Rd
+ba0182b58fd7417db37c2519c05e0018 *man/waterabs.Rd
+61cee5db6407f402f1064f7d19d82855 *man/where.is.this.acc.Rd
+353a79f6d7d246604d66fd43d5ee857f *man/words.Rd
+cc29467c82eefe32980cc0834f69c2e6 *man/words.pos.Rd
+e5d224d86aaaffbb71b4644ebd1f175d *man/write.fasta.Rd
+3b1e4dce6505f1509894f7d162380649 *man/zscore.Rd
+45e4c32aff3ab2954e3835005b4c38e6 *src/Makevars
+cb407d8c24b063ecc44bb3bb49ab6216 *src/Makevars.win
+436a2de8b172239b4ccf21ad72641759 *src/alignment.c
+72d7cf9e818c492b578f390f794f10f7 *src/alignment.h
+320b43b2d16f79ed0b12b471bad1c13c *src/fastacc.c
+ffbe5a8a0eb4f0468a358e69160f8c28 *src/getzlibsock.c
+f6eb92933aec8516b6fa19a6db75747c *src/kaks.c
+3604f3611d34f9864f85ed904739cbe3 *src/util.c
+833983b499aae68695d2852d4373abe4 *src/zsockr.c
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..4fb4f9f
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,80 @@
+# Export all names
+useDynLib(seqinr, .registration = TRUE)
+exportPattern(".")
+importFrom("ade4", "dudi.coa")
+importFrom("grDevices", "col2rgb", "grey", "rainbow", "rgb")
+importFrom("graphics", "abline", "axis", "box", "dotchart", "hist",
+ "image", "legend", "lines", "mtext", "par", "plot",
+ "plot.new", "plot.window", "points", "polygon", "rect",
+ "segments", "text", "title")
+importFrom("stats", "as.dist", "complete.cases", "cor", "density",
+ "dnorm", "integrate", "kruskal.test", "lm", "nlm",
+ "optimize", "runif", "splinefun", "t.test")
+importFrom("utils", "data", "download.file", "packageDescription",
+ "read.table", "str", "tail")
+S3method(as.matrix, alignment)
+S3method(getAnnot, SeqAcnucWeb)
+S3method(getAnnot, SeqFastaAA)
+S3method(getAnnot, SeqFastadna)
+S3method(getAnnot, default)
+S3method(getAnnot, list)
+S3method(getAnnot, logical)
+S3method(getAnnot, qaw)
+S3method(getFrag, SeqAcnucWeb)
+S3method(getFrag, SeqFastaAA)
+S3method(getFrag, SeqFastadna)
+S3method(getFrag, SeqFrag)
+S3method(getFrag, character)
+S3method(getFrag, default)
+S3method(getFrag, list)
+S3method(getFrag, logical)
+S3method(getFrag, qaw)
+S3method(getKeyword, SeqAcnucWeb)
+S3method(getKeyword, default)
+S3method(getKeyword, list)
+S3method(getKeyword, logical)
+S3method(getKeyword, qaw)
+S3method(getLength, SeqAcnucWeb)
+S3method(getLength, SeqFastaAA)
+S3method(getLength, SeqFastadna)
+S3method(getLength, SeqFrag)
+S3method(getLength, character)
+S3method(getLength, default)
+S3method(getLength, list)
+S3method(getLength, logical)
+S3method(getLength, qaw)
+S3method(getLocation, SeqAcnucWeb)
+S3method(getLocation, default)
+S3method(getLocation, list)
+S3method(getLocation, logical)
+S3method(getLocation, qaw)
+S3method(getName, SeqAcnucWeb)
+S3method(getName, SeqFastaAA)
+S3method(getName, SeqFastadna)
+S3method(getName, SeqFrag)
+S3method(getName, default)
+S3method(getName, list)
+S3method(getName, logical)
+S3method(getName, qaw)
+S3method(getSequence, SeqAcnucWeb)
+S3method(getSequence, SeqFastaAA)
+S3method(getSequence, SeqFastadna)
+S3method(getSequence, SeqFrag)
+S3method(getSequence, character)
+S3method(getSequence, default)
+S3method(getSequence, list)
+S3method(getSequence, logical)
+S3method(getSequence, qaw)
+S3method(getTrans, SeqAcnucWeb)
+S3method(getTrans, SeqFastadna)
+S3method(getTrans, SeqFrag)
+S3method(getTrans, character)
+S3method(getTrans, default)
+S3method(getTrans, list)
+S3method(getTrans, logical)
+S3method(getTrans, qaw)
+S3method(plot, SeqAcnucWeb)
+S3method(print, SeqAcnucWeb)
+S3method(print, qaw)
+S3method(summary, SeqFastaAA)
+S3method(summary, SeqFastadna)
diff --git a/R/AAstat.R b/R/AAstat.R
new file mode 100644
index 0000000..66afef1
--- /dev/null
+++ b/R/AAstat.R
@@ -0,0 +1,31 @@
+data(sysdata, envir=environment())
+AAstat <- function(seq, plot = TRUE){
+ #
+ # seq is a protein sequence as a vector of (upper case) chars.
+ #
+ AAP <- SEQINR.UTIL$AA.PROPERTY
+ tutu <- lapply(names(AAP), function(x) which(seq %in% AAP[[x]]))
+ names(tutu) <- names(AAP)
+
+ n.items <- length(tutu) # Number of physoco-chemical properties
+ n.res <- length(seq) # Number of residues in the protein
+
+ if(plot == TRUE){
+ coul <- rainbow(n.items)
+ plot(c(0, n.res), c(0, n.items + 1), type = "n", axes = FALSE,
+ ann = FALSE, xlim = c(0, n.res + 1))
+ title(xlab = "Position of the residues along the sequence")
+ axis(2, at = seq(1.5, 10, 1), labels = names(tutu), col.lab = "blue", las = 1, cex.axis = 0.8)
+ axis(1, at = seq(0, n.res, 15), labels = seq(0, n.res, 15), col.axis = "blue")
+ lapply(seq_len(n.items), function(x){
+ segments(tutu[[x]], x, tutu[[x]], x + 1, col = coul[x], lwd = 2)
+ rect(0, x, n.res, 1, lwd = 2)
+ })
+ rect(0, n.items + 1, n.res, 1, lwd = 2)
+ }
+
+ res1 <- lapply(tutu,function(x){length(x)/n.res})
+ res2 <- table(factor(seq, levels = levels(SEQINR.UTIL$CODON.AA$L)))
+ res3 <- computePI(seq)
+ return(list(Compo = res2, Prop = res1, Pi = res3))
+}
diff --git a/R/ClassSeq.R b/R/ClassSeq.R
new file mode 100644
index 0000000..0187fb7
--- /dev/null
+++ b/R/ClassSeq.R
@@ -0,0 +1,84 @@
+
+##########################################################################
+#
+# SeqFastadna:
+#
+
+as.SeqFastadna <- function(object, name = NULL, Annot = NULL){
+ attributes(object) <- list(name = name, Annot = Annot)
+ class(object) <- "SeqFastadna"
+ return(object)
+}
+
+is.SeqFastadna <- function(object) inherits(object, "SeqFastadna")
+
+summary.SeqFastadna <- function(object, alphabet = s2c("acgt"), ...){
+ length <- getLength(object)
+ compo <- count(object, 1, alphabet = alphabet)
+ return(list(length = length , composition = compo, GC = GC(object)))
+}
+
+#
+# SeqFastaAA:
+#
+
+as.SeqFastaAA <- function(object, name = NULL, Annot = NULL){
+ attributes(object) <- list(name = name, Annot= Annot)
+ class(object) <- "SeqFastaAA"
+ return(object)
+}
+
+is.SeqFastaAA <- function(object) inherits(object, "SeqFastaAA")
+
+summary.SeqFastaAA <- function(object,...){
+ length <- getLength(object)
+ compo <- table(factor(object, levels = levels(SEQINR.UTIL$CODON.AA$L)))
+ return(list(length = length, composition=compo/length, AA.Property=AAstat(object,plot=FALSE)[[2]]))
+}
+
+
+#
+# SeqAcnucWeb:
+#
+
+as.SeqAcnucWeb <- function(object, length, frame, ncbigc){
+ attributes(object) <- list(length = as.numeric(length),
+ frame = as.numeric(frame), ncbigc = as.numeric(ncbigc))
+ class(object) <- "SeqAcnucWeb"
+ return(object)
+}
+
+is.SeqAcnucWeb <- function(object) inherits(object, "SeqAcnucWeb")
+
+
+print.SeqAcnucWeb <- function(x, ...)
+{
+ res <- c(x, attr(x, "length"), attr(x, "frame"), attr(x, "ncbigc"))
+ names(res) <- c("name", "length", "frame", "ncbicg")
+ print(res, ...)
+}
+
+#
+# SeqFrag:
+#
+
+
+as.SeqFrag <- function(object, begin, end, name){
+ attr(object, "seqMother") <- name
+ attr(object, "begin") <- begin
+ attr(object,"end") <- end
+ class(object) <- "SeqFrag"
+ return(object)
+}
+
+is.SeqFrag <- function(object) inherits(object, "SeqFrag")
+
+#
+# Query Acnuw Web (qaw class)
+#
+
+print.qaw <- function(x, ...)
+{
+ cat(x$nelem, x$type, "for", list1$call$query)
+}
+
diff --git a/R/GC.R b/R/GC.R
new file mode 100644
index 0000000..35ec4dc
--- /dev/null
+++ b/R/GC.R
@@ -0,0 +1,182 @@
+################################
+# G+C content
+#################################
+
+GC <- function(seq, forceToLower = TRUE, exact = FALSE, NA.GC = NA, oldGC = FALSE )
+{
+ #
+ # NA propagation:
+ #
+ if(length(seq) == 1 && is.na(seq)) return(NA)
+ #
+ # Check that sequence is a vector of chars:
+ #
+ if(nchar(seq[1]) > 1) stop("sequence is not a vector of chars")
+ #
+ # Force to lower-case letters if requested:
+ #
+ if(forceToLower) seq <- tolower(seq)
+ #
+ # Compute the count of each base:
+ #
+ nc <- sum( seq == "c" )
+ ng <- sum( seq == "g" )
+ na <- sum( seq == "a" )
+ nt <- sum( seq == "t" )
+
+ #
+ # oldGC case:
+ #
+ if(oldGC){
+ warning("argument oldGC is deprecated")
+ return( (nc + ng)/length(seq) )
+ }
+
+ #
+ # General case:
+ #
+ if(! exact){
+ if(na + nc + ng + nt == 0){
+ result <- NA.GC
+ } else {
+ result <- (nc + ng)/(na + nc + ng + nt)
+ }
+ } else {
+ #
+ # We have our first estimate of GC vs. AT base counts:
+ #
+ ngc <- ng + nc
+ nat <- na + nt
+
+ #
+ # weak and strong bases are 100% informative with respect
+ # to the GC content, we just add them:
+ #
+ # s : Strong (g or c)
+ # w : Weak (a or t)
+ #
+ ngc <- ngc + sum( seq == "s" )
+ nat <- nat + sum( seq == "w" )
+
+ ##########################
+ # Ambiguous base section #
+ ##########################
+
+ #
+ # m : Amino (a or c)
+ #
+ if(na + nc != 0){
+ nm <- sum( seq == "m")
+ ngc <- ngc + nm*nc/(na + nc)
+ nat <- nat + nm*na/(na + nc)
+ }
+
+ #
+ # k : Keto (g or t)
+ #
+ if(ng + nt != 0){
+ nk <- sum( seq == "k" )
+ ngc <- ngc + nk*ng/(ng + nt)
+ nat <- nat + nk*nt/(ng + nt)
+ }
+
+ #
+ # r : Purine (a or g)
+ #
+ if(ng + na != 0){
+ nr <- sum( seq == "r" )
+ ngc <- ngc + nr*ng/(ng + na)
+ nat <- nat + nr*na/(ng + na)
+ }
+
+ #
+ # y : Pyrimidine (c or t)
+ #
+ if(nc + nt != 0){
+ ny <- sum( seq == "y" )
+ ngc <- ngc + ny*nc/(nc + nt)
+ nat <- nat + ny*nt/(nc + nt)
+ }
+
+ #
+ # v : not t (a, c or g)
+ #
+ if(na + nc + ng != 0){
+ nv <- sum( seq == "v" )
+ ngc <- ngc + nv*(nc + ng)/(na + nc + ng)
+ nat <- nat + nv*na/(na + nc + ng)
+ }
+ #
+ # h : not g (a, c or t)
+ #
+ if(na + nc + nt != 0){
+ nh <- sum( seq == "h" )
+ ngc <- ngc + nh*nc/(na + nc + nt)
+ nat <- nat + nh*(na + nt)/(na + nc + nt)
+ }
+ #
+ # d : not c (a, g or t)
+ #
+ if(na + ng + nt != 0){
+ nd <- sum( seq == "d" )
+ ngc <- ngc + nd*ng/(na + ng + nt)
+ nat <- nat + nd*(na + nt)/(na + ng + nt)
+ }
+ #
+ # b : not a (c, g or t)
+ #
+ if(nc + ng + nt != 0){
+ nb <- sum( seq == "b" )
+ ngc <- ngc + nb*(nc + ng)/(nc + ng + nt)
+ nat <- nat + nb*nt/(nc + ng + nt)
+ }
+ #
+ # n : any (a, c, g or t) is not informative, so
+ # we compute the G+C content as:
+ #
+ if( ngc + nat == 0){
+ result <- NA.GC
+ } else {
+ result <- ngc/(ngc + nat)
+ }
+ }
+ return(result)
+}
+
+######################
+# GCpos #
+######################
+
+GCpos <- function(seq, pos, frame = 0, ...){
+ if(nchar(seq[1]) > 1){
+ warning("sequence is not a vector of chars, I'm trying to cast it into one")
+ seq <- s2c(seq[1])
+ }
+ #
+ # Take frame into account:
+ #
+ if(frame != 0) seq <- seq[(1 + frame):length(seq)]
+ #
+ # Return result:
+ #
+ GC(seq[seq(pos, length(seq), by = 3)], ...)
+}
+
+######################
+# GC1 #
+######################
+
+GC1 <- function(seq, frame = 0, ...) GCpos(seq = seq, pos = 1, frame = frame, ...)
+
+######################
+# GC2 #
+######################
+
+GC2 <- function(seq, frame = 0, ...) GCpos(seq = seq, pos = 2, frame = frame, ...)
+
+######################
+# GC3 #
+######################
+
+GC3 <- function(seq, frame = 0, ...) GCpos(seq = seq, pos = 3, frame = frame, ...)
+
diff --git a/R/PI.R b/R/PI.R
new file mode 100644
index 0000000..f901b70
--- /dev/null
+++ b/R/PI.R
@@ -0,0 +1,47 @@
+#
+# computePI: To Compute the Theoretical Isoelectric Point
+#
+# seq is a protein sequence as a vector of single characters in upper case,
+# note that there is no argument checking here.
+#
+
+computePI <- function(seq){
+#
+# Remove stop codons translated as the character '*' :
+#
+ if(length(which(seq == "*")) != 0) seq <- seq[- which(seq == "*")]
+
+ compoAA <- table(factor(seq, levels = LETTERS))
+ nTermR <- which(LETTERS == seq[1])
+ cTermR <- which(LETTERS == seq[length(seq)])
+
+ computeCharge <- function(pH, compoAA, pK, nTermResidue, cTermResidue){
+ cter <- 10^(-pK[cTermResidue,1]) / (10^(-pK[cTermResidue,1]) + 10^(-pH))
+ nter <- 10^(-pH) / (10^(-pK[nTermResidue,2]) + 10^(-pH))
+ carg <- as.vector(compoAA['R'] * 10^(-pH) / (10^(-pK['R',3]) + 10^(-pH)))
+ chis <- as.vector(compoAA['H'] * 10^(-pH) / (10^(-pK['H',3]) + 10^(-pH)))
+ clys <- as.vector(compoAA['K'] * 10^(-pH) / (10^(-pK['K',3]) + 10^(-pH)))
+ casp <- as.vector(compoAA['D'] * 10^(-pK['D',3]) /(10^(-pK['D',3]) + 10^(-pH)))
+ cglu <- as.vector(compoAA['E'] * 10^(-pK['E',3]) / (10^(-pK['E',3]) + 10^(-pH)))
+ ccys <- as.vector(compoAA['C'] * 10^(-pK['C',3]) / (10^(-pK['C',3]) + 10^(-pH)))
+ ctyr <- as.vector(compoAA['Y'] * 10^(-pK['Y',3]) / (10^(-pK['Y',3]) + 10^(-pH)))
+ charge <- carg + clys + chis + nter - (casp + cglu + ctyr + ccys + cter)
+ return(charge)
+ }
+
+ critere <- function( p1, p2, p3, p4, p5){
+ computeCharge(pH = p1, compoAA = p2, pK = p3, nTermResidue = p4, cTermResidue = p5)^2
+ }
+
+ nlmres <- suppressWarnings(nlm(critere, 7, p2 = compoAA, p3 = SEQINR.UTIL$pk, p4 = nTermR, p5 = cTermR))
+ #
+ # If minimum is not zero, try whith a different guess:
+ #
+ while( ! identical(all.equal( nlmres$minimum, 0 ), TRUE))
+ {
+ nlmres <- suppressWarnings(nlm(critere, runif(1, 0, 14), p2 = compoAA, p3 = SEQINR.UTIL$pk, p4 = nTermR, p5 = cTermR))
+ }
+ return(nlmres$estimate)
+}
+
+
diff --git a/R/R_socket.R b/R/R_socket.R
new file mode 100644
index 0000000..0fd2bc2
--- /dev/null
+++ b/R/R_socket.R
@@ -0,0 +1,27 @@
+
+###################################################################################################
+# #
+# print.qaw #
+# #
+###################################################################################################
+
+print.qaw <- function(x, ...)
+{
+ cat("\n")
+ cat("\n$socket: ")
+ print(x$socket)
+ cat("\n$banque: ")
+ #cat(get("bankName",env=.GlobalEnv)) # Ca pas bon
+ cat("\n$call: ")
+ print(x$call)
+ cat("$name: ")
+ print(x$name)
+ cat("\n")
+ sumry <- array("", c(1, 4), list(1, c("list", "length", "mode", "content")))
+ sumry[1, ] <- c("$req",length(x$req),"character","sequences")
+ class(sumry) <- "table"
+ print(sumry)
+ cat("\n")
+}
+
+
diff --git a/R/acnucclose.R b/R/acnucclose.R
new file mode 100644
index 0000000..93195cb
--- /dev/null
+++ b/R/acnucclose.R
@@ -0,0 +1,30 @@
+# ==> acnucclose
+# <== code=xx
+# To close the currently opened acnuc db.
+# code : 0 if OK
+# 3 if no database was opened by the server
+
+acnucclose <- function(socket){
+ #
+ # Build request:
+ #
+ writeLines("acnucclose", socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ stop("Empty answer from server")
+ }
+ res <- parser.socket(answerFromServer)
+ #
+ # Check that no error is returned:
+ #
+ if(res[1] != "0"){
+ if( res[1] == "3" ){
+ stop("no database was opened by the server")
+ }
+ stop("I don't know what this error code means for acnucclose, please contact package maintener.\n")
+ }
+}
+
diff --git a/R/acnucopen.R b/R/acnucopen.R
new file mode 100644
index 0000000..4e1ce85
--- /dev/null
+++ b/R/acnucopen.R
@@ -0,0 +1,52 @@
+acnucopen <- function(db, socket, challenge = NA){
+ #
+ # Check arguments:
+ #
+ if(!is.na(challenge) ) stop("password protection not implemented yet")
+ #
+ # Build request:
+ #
+ request <- paste("acnucopen&db=", db, sep = "")
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ close(socket)
+ stop("Empty answer from server")
+ }
+ res <- parser.socket(answerFromServer)
+ #
+ # Check that no error is returned:
+ #
+ if(res[1] != "0"){
+ close(socket)
+ if( res[1] == "1" ){
+ stop("unrecognized command") # should not happen
+ }
+ if( res[1] == "2" ){
+ stop("missing db= argument") # should not happen
+ }
+ if( res[1] == "3" ){
+ stop(paste("Database with name -->", db, "<-- is not known by server.\n", sep = ""))
+ }
+ if( res[1] == "4" ){
+ stop(paste("Database with name -->", db, "<-- is currently off for maintenance, please try again later.\n", sep = ""))
+ }
+ if( res[1] == "5" ){
+ stop("A database is currently opened and has not been closed.\n")
+ }
+ if( res[1] == "6" ){
+ stop(paste("Database with name -->", db, "<-- is protected by a password (unimplemented).\n", sep = ""))
+ }
+ stop(paste("I don't know what this error code means for acnucopen:", res[1]))
+ }
+ return(list(type = res[2], totseqs = as.numeric(res[3]), totspecs = as.numeric(res[4]),
+ totkeys = as.numeric(res[5]), ACC_LENGTH = as.numeric(res[6]),
+ L_MNEMO = as.numeric(res[7]), WIDTH_KW = as.numeric(res[8]),
+ WIDTH_SP = as.numeric(res[9]), WIDTH_SMJ = as.numeric(res[10]),
+ WIDTH_AUT = as.numeric(res[11]), WIDTH_BIB = as.numeric(res[12]),
+ lrtxt = as.numeric(res[13]), SUBINLNG = as.numeric(res[14])))
+}
+
diff --git a/R/al2bp.R b/R/al2bp.R
new file mode 100644
index 0000000..0f1a4fe
--- /dev/null
+++ b/R/al2bp.R
@@ -0,0 +1,9 @@
+al2bp <- function(allele.name, repeat.bp = 4, offLadderChars = "><", split = "\\."){
+ allele.name <- as.character(allele.name)
+ if(any(s2c(allele.name) %in% s2c(offLadderChars))) return(NA)
+ dec <- unlist(strsplit(allele.name, split = split))
+ res <- repeat.bp*suppressWarnings(as.numeric(dec[1]))
+ # NA are returned for X and Y at Amelogenin locus for instance
+ if(length(dec) > 1) res <- res + as.numeric(dec[2])
+ return(res)
+}
diff --git a/R/allistranks.R b/R/allistranks.R
new file mode 100644
index 0000000..788b4ac
--- /dev/null
+++ b/R/allistranks.R
@@ -0,0 +1,61 @@
+###################################################################################################
+# #
+# alllistranks #
+# #
+# ==> alllistranks #
+# <== count=xx&n1,n2,... #
+# Returns the count of existing lists and all their ranks separated by commas. #
+# #
+###################################################################################################
+
+alllistranks <- function(socket = autosocket(), verbose = FALSE) {
+ #
+ # Make empty result
+ #
+ result <- list(count = NA, ranks = NA)
+ #
+ # Check arguments:
+ #
+ if(verbose) cat("I'm checking the arguments...\n")
+
+ if( !inherits(socket, "sockconn") ) stop(paste("argument socket = ", socket, "is not a socket connection."))
+ if(verbose) cat("... and everything is OK up to now.\n")
+
+ #
+ # Send request to server:
+ #
+ if(verbose) cat("I'm sending query to server...\n")
+ request <- "alllistranks"
+ writeLines(request, socket, sep = "\n")
+ res <- readLines(socket, n = 1)
+ #
+ # Check for non empty answer from server:
+ #
+ if(verbose) cat(paste("... answer from server is:", res, "\n"))
+ if(length(res) == 0){
+ if(verbose) cat("... answer from server is empty!\n")
+ while(length(res) == 0){
+ if(verbose) cat("... reading again.\n")
+ res <- readLines(socket, n = 1)
+ }
+ }
+ #
+ # Analysing answer from server:
+ #
+ if(verbose) cat("I'm trying to analyse answer from server...\n")
+ if(res == "code=1"){
+ stop("Server returns an error (code=1)")
+ }
+ if(res == "count=0"){
+ if(verbose) cat("Note: there are no list on server (count=0)\n")
+ result$count <- 0
+ } else {
+ #remove "count=" from res
+ res <- substr(x = res, start = 7, stop = nchar(res))
+ result$count <- as.numeric(unlist(strsplit(res, split = "&"))[1])
+ result$ranks <- as.numeric(unlist(strsplit(unlist(strsplit(res, split = "&"))[2], split = ",")))
+ }
+ return(result)
+}
+
+alr <- alllistranks
diff --git a/R/amb.R b/R/amb.R
new file mode 100644
index 0000000..3098287
--- /dev/null
+++ b/R/amb.R
@@ -0,0 +1,30 @@
+amb <- function(base, forceToLower = TRUE, checkBase = TRUE,
+IUPAC = s2c("acgturymkswbdhvn"), u2t = TRUE){
+ if(missing(base)) return(IUPAC)
+ if(!is.character(base)) stop("Character expected")
+ if(nchar(base) != 1) stop("Single character expected")
+ if(forceToLower) base <- tolower(base)
+ if(checkBase) if(!(base %in% IUPAC)) stop("IUPAC base expected")
+
+ if(base == "r") return(s2c("ag")) # puRine
+ if(base == "y") return(s2c("ct")) # pYrimidine
+
+ if(base == "m") return(s2c("ac")) # aMino
+ if(base == "k") return(s2c("gt")) # Keto
+
+ if(base == "s") return(s2c("cg")) # Strong (3 H bonds)
+ if(base == "w") return(s2c("at")) # Weak (2 H bonds)
+
+ if(base == "b") return(s2c("cgt")) # Not A (A->B)
+ if(base == "d") return(s2c("agt")) # Not C (C->D)
+ if(base == "h") return(s2c("act")) # Not G (G->H)
+ if(base == "v") return(s2c("acg")) # Not T (T->V)
+
+ if(base == "n") return(s2c("acgt")) # aNy base
+#
+# Uracil case: Uracil in RNA instead of Thymine in DNA
+#
+ if(base == "u") if(u2t) return("t") else return("u")
+
+ return(base)
+}
\ No newline at end of file
diff --git a/R/as.alignment.R b/R/as.alignment.R
new file mode 100644
index 0000000..aba73e6
--- /dev/null
+++ b/R/as.alignment.R
@@ -0,0 +1,9 @@
+#
+# Constructor for class alignment
+#
+as.alignment <- function(nb = NULL, nam = NULL, seq = NULL, com = NULL){
+ ali <- list(nb = as.numeric(nb), nam = nam, seq = seq, com = com)
+ class(ali) <- "alignment"
+ return(ali)
+}
+
diff --git a/R/as.matrix.alignment.R b/R/as.matrix.alignment.R
new file mode 100644
index 0000000..03e5149
--- /dev/null
+++ b/R/as.matrix.alignment.R
@@ -0,0 +1,8 @@
+as.matrix.alignment <- function(x, ...){
+ nc <- nchar(x$seq[[1]])
+ res <- matrix(data = sapply(x$seq, s2c), nrow = x$nb, ncol = nc,
+ byrow = TRUE)
+ rownames(res) <- x$nam
+ colnames(res) <- seq_len(nc)
+ return(res)
+}
diff --git a/R/autosocket.R b/R/autosocket.R
new file mode 100644
index 0000000..84398ba
--- /dev/null
+++ b/R/autosocket.R
@@ -0,0 +1,41 @@
+#
+# Returns the socket to the last opened database if any.
+#
+autosocket <- function(){
+ #
+ # Check that global variable "banknameSocket" exists:
+ #
+ if(!exists(x = "banknameSocket", envir = .seqinrEnv)){
+ stop("banknameSocket not found, try choosebank() first")
+ }
+ #
+ # Check that "banknameSocket" belongs to the sockconn class:
+ #
+ socket <- get("banknameSocket", .seqinrEnv)$socket
+ if(!inherits(socket, "sockconn")){
+ stop("banknameSocket$socket is not a sockconn")
+ }
+ #
+ # Check that the socket connection status is OK:
+ #
+ status <- summary(socket)
+ if(status[["class"]] != "sockconn"){
+ #
+ # for backward compatibility: in R 2.6.2 the result was "socket",
+ # so "socket" instead of "sockconn" is temporarilly allowed with
+ # a warning.
+ #
+ if(status[["class"]] != "socket"){
+ stop("socket is not a sockconn")
+ } else {
+ warning("backward compatibility patch used, upgrade your R version asap")
+ }
+ }
+ if(status[["opened"]] != "opened") stop("socket is not openend")
+ if(status[["can read"]] != "yes") stop("can't read on socket")
+ if(status[["can write"]] != "yes") stop("can't write on socket")
+ #
+ # Iff everything is OK return the socket:
+ #
+ return(socket)
+}
diff --git a/R/baselineabif.R b/R/baselineabif.R
new file mode 100644
index 0000000..f025d60
--- /dev/null
+++ b/R/baselineabif.R
@@ -0,0 +1,19 @@
+baselineabif <- function(rfu, maxrfu = 1000){
+ #
+ # Check argument:
+ #
+ if(!is.numeric(rfu)) stop("numerical vector expected for rfu")
+ #
+ # Do not consider data above threshold maxrfu:
+ #
+ rfu[rfu >= maxrfu] <- NA
+ #
+ # Compute a kernel density estimate of data:
+ #
+ dst <- density(rfu, na.rm = TRUE)
+ #
+ # Choose as baseline the most common value:
+ #
+ baseline <- dst$x[which.max(dst$y)]
+ return(baseline)
+}
diff --git a/R/bma.R b/R/bma.R
new file mode 100644
index 0000000..c8a71aa
--- /dev/null
+++ b/R/bma.R
@@ -0,0 +1,20 @@
+bma <- function(nucl, warn.non.IUPAC = TRUE, type = c("DNA", "RNA")){
+ if(nchar(nucl[1]) != 1) stop("vector of single chars expected")
+ type <- match.arg(type)
+ nucl <- tolower(nucl)
+ nucl <- unlist(sapply(nucl, amb, checkBase = FALSE))
+ iupac <- sapply(amb(), amb)
+ if(type == "DNA"){
+ iupac$u <- NULL
+ } else {
+ iupac$t <- NULL
+ }
+ idx <- unlist(lapply(iupac, setequal, nucl))
+ if(all(idx == FALSE)){
+ if(warn.non.IUPAC){
+ warning(paste("Undefined IUPAC symbol with:", paste(nucl, collapse = " ")))
+ }
+ return(NA)
+ }
+ return(names(iupac)[idx])
+}
diff --git a/R/cai.R b/R/cai.R
new file mode 100644
index 0000000..64968f0
--- /dev/null
+++ b/R/cai.R
@@ -0,0 +1,11 @@
+cai <- function(seq, w, numcode = 1, zero.threshold = 0.0001, zero.to = 0.01){
+ stops <- which("Stp" == aaa(translate(s2c(c2s(words())), numcode = numcode)))
+ singulets <- which(sapply(syncodons(words(), numcode = numcode), length) == 1)
+ exclude <- c(stops, singulets)
+ w <- w[-exclude]
+ w[w < zero.threshold] <- zero.to # if value is effectively zero make it 0.01
+ nncod <- uco(seq)
+ nncod <- nncod[-exclude]
+ sigma <- nncod %*% log(w)
+ exp(sigma/sum(nncod))
+}
diff --git a/R/choosebank.R b/R/choosebank.R
new file mode 100644
index 0000000..de0d583
--- /dev/null
+++ b/R/choosebank.R
@@ -0,0 +1,153 @@
+###########################################################################
+#
+# choosebank
+#
+# To select an ACNUC database or to get the list of available databases
+# from an ACNUC server.
+#
+###########################################################################
+
+.seqinrEnv <- new.env()
+choosebank <- function(bank = NA,
+ host = "pbil.univ-lyon1.fr",
+ port = 5558,
+ server = FALSE,
+ blocking = TRUE,
+ open = "a+",
+ encoding = "",
+ verbose = FALSE,
+ timeout = 5,
+ infobank = FALSE,
+ tagbank = NA){
+ #
+ # Print parameter values if verbose mode is on:
+ #
+ if(verbose){
+ cat("Verbose mode is on, parameter values are:\n")
+ cat(paste(" bank = ", deparse(substitute(bank)), "\n"))
+ cat(paste(" host = ", deparse(substitute(host)), "\n"))
+ cat(paste(" port = ", deparse(substitute(port)), "\n"))
+ cat(paste(" timeout = ", deparse(substitute(timeout)), "seconds \n"))
+ cat(paste(" infobank = ", deparse(substitute(infobank)), "\n"))
+ cat(paste(" tagbank = ", deparse(substitute(tagbank)), "\n"))
+ }
+
+ #
+ # Check parameter values (to be completed):
+ #
+ if( !is.na(tagbank) ){
+ if(verbose) cat("I'm checking the tagbank parameter value...\n")
+ if( !(tagbank %in% c("TEST", "TP", "DEV")) ){
+ if(verbose) cat("... and I was able to detect an error.\n")
+ stop("non allowed value for tagbank parameter.\n")
+ } else {
+ if(verbose) cat("... and everything is OK up to now.\n")
+ }
+ }
+
+ #
+ # Check that sockets are available:
+ #
+ if(verbose) cat("I'm ckecking that sockets are available on this build of R...\n")
+ if( !capabilities("sockets") ){
+ stop("Sockets are not available on this build of R.")
+ } else {
+ if(verbose) cat("... yes, sockets are available on this build of R.\n")
+ }
+
+ #
+ # Try to open socket connection:
+ #
+ if(verbose) cat("I'm trying to open the socket connection...\n")
+ oldtimeout <- getOption("timeout")
+ options(timeout = timeout)
+ socket <- try( socketConnection( host = host, port = port, server = server,
+ blocking = blocking, open = open, encoding = encoding))
+ options(timeout = oldtimeout)
+ if(inherits(socket, "try-error")) {
+ errmess <- paste("I wasn't able to open the socket connection:\n",
+ " o Check that your are connected to the internet.\n",
+ " o Check that port", port, "is not closed by a firewall.\n",
+ " o Try to increase timeout value (current is", timeout, "seconds).\n")
+ stop(errmess)
+ } else {
+ if(verbose) cat("... yes, I was able to open the socket connection.\n")
+ }
+
+ #
+ # Read the answer from server:
+ #
+ if(verbose) cat("I'm trying to read answer from server...\n")
+ rep1 <- readLines(socket, n = 1)
+ if(verbose) cat(paste("... answer from server is:", rep1, "\n"))
+
+ #
+ # Send client ID to server:
+ #
+ clientid(socket = socket, verbose = verbose)
+
+ ###############################################################################
+ #
+ # If no bank name is given, return the list of available banks from server:
+ #
+ ###############################################################################
+ resdf <- kdb(tag = tagbank, socket = socket)
+ nbank <- nrow(resdf)
+
+ if( is.na(bank) ){
+ close(socket) # No more needed
+ if(verbose) cat("No bank argument was given...\n")
+ if( !infobank ){
+ if(verbose) cat("infobank parameter is FALSE, I'm just returning bank names\n")
+ return(resdf$bank)
+ } else {
+ if(verbose) cat("infobank parameter is TRUE, I'm returning all bank infos\n")
+ return(resdf)
+ }
+ } else {
+
+ ###############################################################################
+ #
+ # If a bank name is given, try to open it from server:
+ #
+ ###############################################################################
+
+ #
+ # Try to open bank from server:
+ #
+ if(verbose) cat("I'm trying to open the bank from server...\n")
+ resacnucopen <- acnucopen(bank, socket)
+ if(verbose) cat("... and everything is OK up to now.\n")
+
+ #
+ # Try to get informations from HELP file:
+ #
+ if(verbose) cat("I'm trying to get information on the bank...\n")
+ bankhelp <- ghelp(item = "CONT", file = "HELP", socket = socket, catresult = FALSE)
+ bankrel <- bankhelp[2]
+ if(verbose) cat("... and everything is OK up to now.\n")
+
+ #
+ # Try to get status info:
+ #
+ status <- "unknown"
+ for(i in seq_len(nbank)){
+ if (resdf[i,1] == bank) status <- resdf[i,2]
+ }
+
+ #
+ # Build result and assign it in the global environment:
+ #
+ res <- list(socket = socket,
+ bankname = bank,
+ banktype = resacnucopen$type,
+ totseqs = resacnucopen$totseqs,
+ totspecs = resacnucopen$totspecs,
+ totkeys = resacnucopen$totkeys,
+ release = bankrel,
+ status = status,
+ details = bankhelp)
+ assign("banknameSocket", res, .seqinrEnv)
+ invisible(res)
+ }
+}
diff --git a/R/circle.R b/R/circle.R
new file mode 100644
index 0000000..258b4bc
--- /dev/null
+++ b/R/circle.R
@@ -0,0 +1,10 @@
+circle <- function(x = 0, y = 0, r = 1, theta = c(0, 360), n = 100, ...){
+ a <- seq(theta[1], theta[2], length = n)
+ xx <- x + r*cos(a*2*pi/360)
+ yy <- y + r*sin(a*2*pi/360)
+ if(!identical(theta, c(0, 360))){
+ xx <- c(xx, x)
+ yy <- c(yy, y)
+ }
+ polygon(xx, yy, ...)
+}
diff --git a/R/clfcd.R b/R/clfcd.R
new file mode 100644
index 0000000..3afc5d5
--- /dev/null
+++ b/R/clfcd.R
@@ -0,0 +1,90 @@
+
+crelistfromclientdata <- function(listname, file, type, socket = autosocket(), invisible = TRUE, verbose = FALSE,
+virtual = FALSE) {
+ #
+ # Check arguments:
+ #
+ if(verbose) cat("I'm checking the arguments...\n")
+
+ if(!file.exists(file)) stop(paste("input file", file, "doesn't exist."))
+ if( ! type %in% c("SQ", "AC", "SP", "KW") ) stop("wrong value for type argument.")
+
+ if( !inherits(socket, "sockconn") ) stop(paste("argument socket = ", socket, "is not a socket connection."))
+ if( !is.logical(invisible) ) stop(paste("argument invisible = ", invisible, "should be TRUE or FALSE."))
+ if( is.na(invisible) ) stop(paste("argument invisible = ", invisible, "should be TRUE or FALSE."))
+ if(verbose) cat("... and everything is OK up to now.\n")
+
+ #
+ # Check the status of the socket connection:
+ #
+ if(verbose) cat("I'm checking the status of the socket connection...\n")
+ #
+ # Ca marche pas: summary.connection leve une exception et on ne va pas plus loin
+ #
+ if(!isOpen(socket)) stop(paste("socket:", socket, "is not opened."))
+ if(!isOpen(socket, rw = "read")) stop(paste("socket:", socket, "can not read."))
+ if(!isOpen(socket, rw = "write")) stop(paste("socket:", socket, "can not write."))
+ if(verbose) cat("... and everything is OK up to now.\n")
+
+ #
+ # Read user file:
+ #
+ infile <- file(description = file, open = "r")
+ data <- readLines(infile)
+ close(infile)
+ nl <- length(data)
+ #
+ # Send request to server:
+ #
+ if(verbose) cat("I'm sending query to server...\n")
+ request <- paste("crelistfromclientdata&type=", type, "&nl=", nl, sep = "")
+ if(verbose) writeLines(c(request, data))
+ writeLines(c(request, data), socket, sep = "\n")
+ res <- readLines(socket, n = 1)
+ #
+ # Check for non empty answer from server:
+ #
+ if(verbose) cat(paste("... answer from server is:", res, "\n"))
+ if(length(res) == 0){
+ if(verbose) cat("... answer from server is empty!\n")
+ while(length(res) == 0){
+ if(verbose) cat("... reading again.\n")
+ res <- readLines(socket, n = 1)
+ }
+ }
+ #
+ # Analysing answer from server:
+ #
+ if(verbose) cat("I'm trying to analyse answer from server...\n")
+ p <- parser.socket(res)
+ if(p[1] != "0"){
+ if(verbose) cat("... and I was able to detect an error.\n")
+ if( p[1] == "3" ) stop("no list creation is possible")
+ if( p[1] == "4" ) stop("EOF while reading the nl lines from client")
+ stop(paste("unknown error code from server:", p[1]))
+ }
+ if(verbose) cat("... and everything is OK up to now.\n")
+
+ if(verbose){
+ cat("listname is:", p[2], "\n")
+ cat("list rank is:", p[3], "\n")
+ cat("list count of elements is:", p[4], "\n")
+ }
+ #
+ # set ACNUC list name with user suplied parameters:
+ #
+ resstl <- setlistname(lrank = p[3], name = listname)
+ if(resstl != 0) stop(paste("problem with setlistname, code : ", resstl))
+ #
+ # Put results into workspace
+ #
+ if(invisible){
+ invisible(query(listname = listname, query = listname, socket = socket, invisible = TRUE,
+ verbose = verbose, virtual = virtual))
+ } else {
+ return(query(listname = listname, query = listname, socket = socket, invisible = FALSE,
+ verbose = verbose, virtual = virtual))
+ }
+}
+
+clfcd <- crelistfromclientdata
diff --git a/R/clientid.R b/R/clientid.R
new file mode 100644
index 0000000..557841a
--- /dev/null
+++ b/R/clientid.R
@@ -0,0 +1,21 @@
+# ==> clientid&id="xxxxx"
+# <== code=0
+# Sends the server an identification of the client, typically a program name.
+
+clientid <- function(id = paste("seqinr_", packageDescription("seqinr")$Version, sep = ""), socket, verbose = FALSE){
+ #
+ # Client ID definition : seqinr + package version number
+ # (internal note: log file is: /mnt/users/ADE-User/racnuc/log)
+ #
+ request <- paste("clientid&id=", id, sep = "")
+ if(verbose) cat(paste("clientid(): sending", request, "\n"))
+ writeLines( request, socket, sep = "\n")
+ rep <- readLines(socket, n = 1)
+ if(verbose) cat(paste("... answer from server is:", rep, "\n"))
+ res <- parser.socket(rep, verbose = verbose)
+ if( res[1] != "0") {
+ print(rep)
+ stop("I don't know what this error code means for clientid, please contact package maintener.\n")
+ }
+}
+
diff --git a/R/closebank.R b/R/closebank.R
new file mode 100644
index 0000000..5af0407
--- /dev/null
+++ b/R/closebank.R
@@ -0,0 +1,25 @@
+closebank <- function(socket = autosocket(), verbose = FALSE){
+ #
+ # Send "acnucclose" to server:
+ #
+ if(verbose) cat("I'm trying to send an acnucclose message to server...\n")
+ acnucclose(socket)
+ if(verbose) cat("... and everything is OK up to now.\n")
+ #
+ # Send "quit" to server:
+ #
+ if(verbose) cat("I'm trying to send a quit message to server...\n")
+ quitacnuc(socket)
+ if(verbose) cat("... and everything is OK up to now.\n")
+ #
+ # Close connection:
+ #
+ if(verbose) cat("I'm trying to close connection...\n")
+ res <- try(close.connection(socket))
+ if( inherits(res, "try-error") ){
+ if(verbose) cat("I was able to detect an error while closing connection.\n")
+ stop("problem while closing connection.\n")
+ } else {
+ if(verbose) cat("... and everything is OK up to now.\n")
+ }
+}
diff --git a/R/col2alpha.R b/R/col2alpha.R
new file mode 100644
index 0000000..5637573
--- /dev/null
+++ b/R/col2alpha.R
@@ -0,0 +1,4 @@
+col2alpha <- function(color, alpha = 0.5){
+ x <- col2rgb(color)[,1]
+ rgb(x[1], x[2], x[3], 255*alpha, maxColorValue = 255)
+}
diff --git a/R/comp.R b/R/comp.R
new file mode 100755
index 0000000..e6eb12a
--- /dev/null
+++ b/R/comp.R
@@ -0,0 +1,44 @@
+##########################################
+# complements a sequence
+###########################################
+
+comp <- function(seq, forceToLower = TRUE, ambiguous = FALSE){
+
+ if(all(seq %in% LETTERS)){
+ isUpper <- TRUE
+ } else {
+ isUpper <- FALSE
+ }
+
+ seq <- tolower(seq)
+
+ result <- as.vector(n2s((3-s2n(seq))))
+ #
+ # More work is required if ambiguous bases are taken into account
+ #
+
+ if(ambiguous){
+ result[which(seq == "b")] <- "v"
+ result[which(seq == "d")] <- "h"
+ result[which(seq == "h")] <- "d"
+ result[which(seq == "k")] <- "m"
+ result[which(seq == "m")] <- "k"
+ result[which(seq == "s")] <- "s"
+ result[which(seq == "v")] <- "b"
+ result[which(seq == "w")] <- "w"
+ result[which(seq == "n")] <- "n"
+ result[which(seq == "y")] <- "r"
+ result[which(seq == "r")] <- "y"
+ }
+
+ # Checking for N in the sequence, thanks to Jeremy Shearman.
+
+ result[which(seq == "n")] <- "n"
+
+
+ if(isUpper && !forceToLower){
+ result <- toupper(result)
+ }
+ return(result)
+}
+
diff --git a/R/consensus.R b/R/consensus.R
new file mode 100644
index 0000000..1d5cf13
--- /dev/null
+++ b/R/consensus.R
@@ -0,0 +1,42 @@
+consensus <- function(matali, method = c( "majority", "threshold", "IUPAC", "profile"), threshold = 0.60,
+ warn.non.IUPAC = FALSE, type = c("DNA", "RNA")){
+
+ if(inherits(matali, "alignment")) matali <- as.matrix(matali)
+ if(!is.matrix(matali)) stop("matrix or alignment object expected")
+ if(storage.mode(matali) != "character") stop("matrix of characters expected")
+
+ method <- match.arg(method)
+
+ if(method == "IUPAC"){
+ type <- match.arg(type)
+ res <- apply(matali, 2, bma, warn.non.IUPAC = warn.non.IUPAC, type = type)
+ names(res) <- NULL
+ return(res)
+ }
+
+ if(method == "majority"){
+ majority <- function(x) names(which.max(table(x)))
+ res <- apply(matali, 2, majority)
+ names(res) <- NULL
+ return(res)
+ }
+
+ if(method == "profile"){
+ obsvalue <- levels(factor(matali))
+ nrow <- length(obsvalue)
+ row.names(matali)<-NULL
+ res <- apply(matali, 2, function(x) table(factor(x, levels = obsvalue)))
+ return(res)
+ }
+
+ if(method == "threshold"){
+ profile <- consensus(matali, method = "profile")
+ profile.rf <- apply(profile, 2, function(x) x/sum(x))
+ res <- rownames(profile.rf)[apply(profile.rf, 2, which.max)]
+ res <- ifelse(apply(profile.rf, 2, max) >= threshold, res, NA)
+ names(res) <- NULL
+ return(res)
+ }
+}
+
+con <- consensus
diff --git a/R/count.R b/R/count.R
new file mode 100755
index 0000000..71ba7bf
--- /dev/null
+++ b/R/count.R
@@ -0,0 +1,38 @@
+count <- function(seq, wordsize, start = 0, by = 1, freq = FALSE, alphabet = s2c("acgt"), frame = start){
+#
+# For backward compatibility:
+#
+ if(!missing(frame)) start = frame
+#
+# istarts contains the first position of oligomers in the sequence (starting at 1)
+#
+ istarts <- seq(from = 1 + start, to = length(seq), by = by)
+#
+# oligos contains the first character of oligomers:
+#
+ oligos <- seq[istarts]
+#
+# oligos.levels contains all possible oligomers for a given alphabet:
+#
+ oligos.levels <- levels(as.factor(words(wordsize, alphabet = alphabet)))
+#
+# For n-mers with n >= 2 we paste the following characters in the
+# sequence to build the observed set of all oligomers. Some NA are
+# generated at the end of the sequence and discarded when counting
+# them.
+#
+ if (wordsize >= 2){
+ for(i in 2:wordsize){
+ oligos <- paste(oligos, seq[istarts + i - 1], sep = "")
+ }
+ }
+#
+# We count all oligomers, even missing ones, and discard NA
+#
+ counts <- table(factor(oligos, levels = oligos.levels))
+#
+# Build result:
+#
+ if(freq == TRUE) counts <- counts/sum(counts)
+ return(counts)
+}
diff --git a/R/countfreelists.R b/R/countfreelists.R
new file mode 100644
index 0000000..b77ec13
--- /dev/null
+++ b/R/countfreelists.R
@@ -0,0 +1,31 @@
+# ==> countfreelists
+# <== code=xx&free=xx&annotlines="xx"
+# Returns the number of free lists available.
+# code: 0 iff OK
+# free: number of free lists available
+# annotlines: list of names of annotation lines in the opened database separated by |
+
+countfreelists <- function(socket = autosocket()){
+ writeLines("countfreelists", socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ #
+ # Build result:
+ #
+ resitem <- parser.socket(answerFromServer)
+ if(resitem[1] != "0"){
+ stop(paste("error code returned by server :", resitem[1]))
+ }
+ free <- as.numeric(resitem[2])
+ tmp <- substr(resitem[3], 2, nchar(resitem[3]) - 1)
+ annotlines <- unlist(strsplit(tmp, split = "|", fixed = TRUE))
+ return(list(free = free, annotlines = annotlines))
+}
+
+cfl <- countfreelists
diff --git a/R/countsubseqs.R b/R/countsubseqs.R
new file mode 100644
index 0000000..934b642
--- /dev/null
+++ b/R/countsubseqs.R
@@ -0,0 +1,36 @@
+# ==> countsubseqs&lrank=xx
+# <== code=xx&count=xx
+# Returns the number of subsequences in list of rank lrank.
+# Code != 0 indicates error.
+
+countsubseqs <- function(lrank, socket = autosocket()){
+ #
+ # Check argument:
+ #
+ if(!is.finite(lrank)) stop("wrong lrank argument")
+ #
+ # Build request:
+ #
+ request <- paste("countsubseqs&lrank=", lrank, sep = "")
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ #
+ # Check that no error is returned:
+ #
+ resitem <- parser.socket(answerFromServer)
+ if(resitem[1] != "0"){
+ warning(paste("error code from server:", answerFromServer))
+ return(NA)
+ }
+ #
+ return(as.numeric(resitem[2]))
+}
+
+css <- countsubseqs
diff --git a/R/db.growth.R b/R/db.growth.R
new file mode 100755
index 0000000..7bbc1d6
--- /dev/null
+++ b/R/db.growth.R
@@ -0,0 +1,89 @@
+get.db.growth <- function(where = "ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/relnotes.txt" )
+{
+ if (!capabilities("http/ftp"))
+ stop("capabilities(\"http/ftp\") is not TRUE")
+ ftp.proxy.bck <- Sys.getenv("ftp_proxy")
+ if (ftp.proxy.bck != "") {
+ warning("I'am trying to neutralize proxies")
+ Sys.setenv("no_proxy" = "")
+ }
+
+ embl <- where
+ tmp <- readLines( embl )
+ idx <- grep("Release(.+) Month", tmp)
+ tmp <- tmp[ (idx + 2):length(tmp) ]
+ tmp <- strsplit( tmp, split = " " )
+ not.empty <- function(x)
+ {
+ x <- x[nchar(x) > 0 ]
+ }
+ tmp <- sapply( tmp, not.empty )
+ tmp <- data.frame( matrix(unlist(tmp), ncol = 4, byrow = TRUE) )
+ names(tmp) <- c("Release", "Month", "Entries", "Nucleotides")
+
+ tmp[,1] <- as.double( as.character(tmp[,1]))
+ tmp[,3] <- as.double( as.character(tmp[,3]))
+ tmp[,4] <- as.double( as.character(tmp[,4]))
+
+ date <- strsplit(as.character(tmp[,2]), split="/")
+ date.to.num <- function(x)
+ {
+ x <- as.double( x )
+ return( (x[1]-1)/12 + x[2] )
+ }
+ date <- sapply(date, date.to.num)
+ tmp <- data.frame( cbind(tmp, date) )
+ return(tmp)
+}
+
+dia.db.growth <- function( get.db.growth.out = get.db.growth(),
+ Moore = TRUE, ... )
+{
+ embl <- "ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/relnotes.txt"
+ op <- par(no.readonly = TRUE)
+ par( bg = "blue" )
+ par( fg = "yellow" )
+ par( col = "yellow" )
+ par( col.axis = "yellow" )
+ par( col.lab = "yellow" )
+ par( col.main = "yellow" )
+ par( col.sub = "yellow" )
+
+ Nucleotides <- get.db.growth.out$Nucleotides
+ Month <- get.db.growth.out$Month
+ date <- get.db.growth.out$date
+
+ plot( date, log10(Nucleotides) , pch = 20,
+ main = paste("The exponential growth of the DDBJ/EMBL/Genbank content\n",
+ "Last update:",
+ Month[nrow(get.db.growth.out)]),
+ xlab = "Year", ylab = "Log10 number of nucleotides",
+ sub = paste("Source:", embl),
+ ... )
+ abline(lm(log10(Nucleotides)~date),col="yellow")
+ lm1 <- lm(log(Nucleotides)~date)
+ mu <- lm1$coef[2] # slope
+ dbt <- log(2)/mu # doubling time
+ dbt <- 12*dbt # in months
+
+ if( Moore )
+ {
+ x <- mean(date)
+ y <- mean(log10(Nucleotides))
+ a <- log10(2)/1.5
+ b <- y - a*x
+
+ for( i in seq(-10,10,by=0.5) )
+ if( i != 0 )
+ abline( coef=c(b+i, a), col="black" )
+ legend( x = 1990, y = 7, legend= c(paste("Observed doubling time:",
+ round(dbt,1),"months"),"Moore's doubling time : 18 months"),
+ lty = c(1,1), col = c("yellow","black"))
+ }
+ else
+ {
+ legend( x = 1990, y = 7, legend=paste("Observed doubling time:",
+ round(dbt,1), "months"), lty = 1, col = "yellow")
+ }
+ par( op )
+}
diff --git a/R/dia.bactgensize.R b/R/dia.bactgensize.R
new file mode 100644
index 0000000..d1190bb
--- /dev/null
+++ b/R/dia.bactgensize.R
@@ -0,0 +1,208 @@
+dia.bactgensize <- function(
+ fit = 2, p = 0.5, m1 = 2000, sd1 = 600, m2 = 4500, sd2 = 1000, p3 = 0.05,
+ m3 = 9000, sd3 = 1000, maxgensize = 20000,
+ source = c("ftp://pbil.univ-lyon1.fr/pub/seqinr/data/goldtable15Dec07.txt",
+ "http://www.genomesonline.org/DBs/goldtable.txt"))
+{
+#
+# Use local source by default:
+#
+ source <- source[1]
+#
+# Build source of data string:
+#
+ if(source == "ftp://pbil.univ-lyon1.fr/pub/seqinr/data/goldtable15Dec07.txt"){
+ sodtxt <- "Source of data: GOLD (Genomes OnLine Database) 15 Dec 2007"
+ } else {
+ sodtxt <- paste("Source of data: GOLD (Genomes OnLine Database)",date())
+ }
+#
+# Read data from GOLD:
+#
+ alldata <- read.table(source, header = TRUE, sep = "\t",
+ comment.char = "", quote = "")
+ SUPERKINGDOM <- 1 # col number
+ kingdom <- alldata[, SUPERKINGDOM]
+ prodata <- alldata[ kingdom == "Archaea" | kingdom == "Bacteria", ]
+ data <- prodata[, c("GENUS", "SPECIES", "SIZE.kb.")]
+ names(data) <- c("genus", "species", "gs")
+ data <- data[complete.cases(data), ]
+#
+# Remove data > maxgensize:
+#
+ data <- data[data$gs <= maxgensize, ]
+#
+# Use Kb scale
+#
+ sizeKb <- data$gs
+ n <- length(sizeKb)
+#
+# Graphics
+#
+ x <- seq( min(sizeKb), max(sizeKb), le=200)
+ mybreaks <- seq(min(sizeKb),max(sizeKb),length=15)
+ vscale <- diff(mybreaks)[1]*n
+
+ if(fit == 0)
+ {
+ hst <- hist(sizeKb, freq = TRUE,
+ breaks = mybreaks,
+ main=paste("Genome size distribution for", n, "bacterial genomes"),
+ xlab="Genome size [Kb]",
+ ylab="Genome count",
+ col="lightgrey")
+
+ dst <- density(sizeKb)
+ lines(x=dst$x, y=vscale*dst$y)
+ legend(x = max(sizeKb)/2, y = 0.75*max(hst$counts), lty=1,
+ "Gaussian kernel density estimation")
+
+ mtext(sodtxt)
+ }
+##########################################
+#
+# Fitting a mixture of two normal distributions
+#
+###########################################
+ if(fit == 2)
+ {
+ logvraineg <- function(param, obs)
+ {
+ p <- param[1]
+ m1 <- param[2]
+ sd1 <- param[3]
+ m2 <- param[4]
+ sd2 <- param[5]
+
+ -sum(log(p*dnorm(obs,m1,sd1)+(1-p)*dnorm(obs,m2,sd2)))
+ }
+
+ nlmres <- suppressWarnings(nlm(logvraineg, c(p, m1, sd1, m2, sd2), obs=sizeKb))
+ estimate <- nlmres$estimate
+
+ y1 <- vscale*estimate[1]*dnorm(x, estimate[2], estimate[3])
+ y2 <- vscale*(1-estimate[1])*dnorm(x, estimate[4], estimate[5])
+ dst <- density(sizeKb)
+
+ hst <- hist(sizeKb, plot = FALSE, breaks = mybreaks)
+ ymax <- max(y1, y2, hst$counts, vscale*dst$y)
+
+
+ hist(sizeKb, freq = TRUE, ylim=c(0,ymax),
+ breaks = mybreaks,
+ main=paste("Genome size distribution for", n, "bacterial genomes"),
+ xlab="Genome size [Kb]",
+ ylab="Genome count",
+ col="lightgrey")
+
+ lines(x, y1, col="red", lwd=2)
+ lines(x, y2, col="blue", lwd=2)
+
+ text(x = max(sizeKb)/2, y = ymax, pos=4, "Maximum likelihood estimates:")
+
+ text(x = max(sizeKb)/2, y = 0.95*ymax, col="red", pos = 4, cex=1.2,
+ substitute(hat(p)[1] == e1~~hat(mu)[1] == e2~~hat(sigma)[1] == e3,
+ list(e1 = round(estimate[1],3),
+ e2 = round(estimate[2],1),
+ e3 = round(estimate[3],1))))
+
+ text(x = max(sizeKb)/2, y = 0.90*ymax, col="blue", pos = 4, cex=1.2,
+ substitute(hat(p)[2] == q~~hat(mu)[2] == e4~~hat(sigma)[2] == e5,
+ list(q = round(1 - estimate[1],3),
+ e4 = round(estimate[4],1),
+ e5 = round(estimate[5],1))))
+
+ lines(x=dst$x, y=vscale*dst$y)
+ legend(x = max(sizeKb)/2, y = 0.75*ymax, lty=1,
+ "Gaussian kernel density estimation")
+
+ mtext(sodtxt)
+ }
+##########################################
+#
+# Fitting a mixture of three normal distributions
+#
+###########################################
+ if(fit == 3)
+ {
+ logvraineg <- function(param, obs)
+ {
+ p <- param[1]
+ m1 <- param[2]
+ sd1 <- param[3]
+ m2 <- param[4]
+ sd2 <- param[5]
+ p3 <- param[6]
+ m3 <- param[7]
+ sd3 <- param[8]
+
+ -sum(log(p*dnorm(obs,m1,sd1)
+ +(1-p-p3)*dnorm(obs,m2,sd2)
+ +p3*dnorm(obs,m3,sd3)))
+ }
+
+ nlmres <- suppressWarnings(nlm(logvraineg, c(p, m1, sd1, m2, sd2, p3, m3, sd3), obs=sizeKb))
+ estimate <- nlmres$estimate
+
+ y1 <- vscale*estimate[1]*dnorm(x, estimate[2], estimate[3])
+ y2 <- vscale*(1-estimate[1]-estimate[6])*dnorm(x, estimate[4], estimate[5])
+ y3 <- vscale*estimate[6]*dnorm(x, estimate[7], estimate[8])
+
+ hst <- hist(sizeKb, plot = FALSE, breaks = mybreaks)
+ ymax <- max(y1, y2, y3, hst$counts)
+
+ hist(sizeKb, freq = TRUE, ylim=c(0,ymax),
+ breaks = mybreaks,
+ main=paste("Genome size distribution for", n, "bacterial genomes"),
+ xlab="Genome size [Kb]",
+ ylab="Genome count",
+ col="lightgrey")
+
+ lines(x, y1, col="red", lwd=2)
+ lines(x, y2, col="blue", lwd=2)
+ lines(x, y3, col="green3", lwd=2)
+
+ text(x = max(sizeKb)/2, y = ymax, pos=4, "Maximum likelihood estimates:")
+
+ text(x = max(sizeKb)/2, y = 0.95*ymax, col="red", pos = 4, cex=1.2,
+ substitute(hat(p)[1] == e1~~hat(mu)[1] == e2~~hat(sigma)[1] == e3,
+ list(e1 = round(estimate[1],3),
+ e2 = round(estimate[2],1),
+ e3 = round(estimate[3],1))))
+
+ text(x = max(sizeKb)/2, y = 0.90*ymax, col="blue", pos = 4, cex=1.2,
+ substitute(hat(p)[2] == q~~hat(mu)[2] == e4~~hat(sigma)[2] == e5,
+ list(q = round(1 - estimate[1]-estimate[6],3),
+ e4 = round(estimate[4],1),
+ e5 = round(estimate[5],1))))
+
+ text(x = max(sizeKb)/2, y = 0.85*ymax, col="green3", pos = 4, cex=1.2,
+ substitute(hat(p)[3] == p3~~hat(mu)[3] == e7~~hat(sigma)[3] == e8,
+ list(p3 = round(estimate[6],3),
+ e7 = round(estimate[7],1),
+ e8 = round(estimate[8],1))))
+
+ dst <- density(sizeKb)
+ lines(x=dst$x, y=vscale*dst$y)
+ legend(x = max(sizeKb)/2, y = 0.75*ymax, lty=1,
+ "Gaussian kernel density estimation")
+
+ mtext(sodtxt)
+ }
+#
+# Return invisibly the dataset
+#
+ invisible(data)
+}
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/R/dist.alignment.R b/R/dist.alignment.R
new file mode 100644
index 0000000..815a7c5
--- /dev/null
+++ b/R/dist.alignment.R
@@ -0,0 +1,41 @@
+#
+# Pairwise Distances from Aligned Protein or DNA/RNA Sequences
+#
+
+dist.alignment <- function(x, matrix = c("similarity", "identity"),gap )
+{
+ #
+ # Check arguments:
+ #
+ if (!inherits(x, "alignment")) stop("Object of class 'alignment' expected")
+ #
+ # Match arguments:
+ #
+ matrix <- match.arg(matrix)
+ #
+ # Compute arguments for the C distance function:
+ #
+ sequences <- toupper(x$seq)
+ nbseq <- x$nb
+ matNumber <-ifelse(matrix == "similarity", 1, 2)
+ #
+ # The following shouldn't be hard encoded, an argument for full
+ # user control should be added.
+ #
+ seqtype <- as.numeric(.Call("is_a_protein_seq", sequences[1], PACKAGE = "seqinr") >= 0.8)
+ #
+ # Call the C distance function:
+ #
+ if (missing(gap)) {
+ dist <- .Call("distance", sequences, nbseq, matNumber, seqtype,0, PACKAGE = "seqinr")
+ }
+ else {
+ dist <- .Call("distance", sequences, nbseq, matNumber, seqtype,gap, PACKAGE = "seqinr")
+ }
+ #
+ # Convert the result in a object of class dist:
+ #
+ mat <- matrix(dist, nbseq, nbseq, byrow = TRUE)
+ dimnames(mat) <- list(x$nam, x$nam)
+ return( as.dist(mat) )
+}
diff --git a/R/dotPlot.R b/R/dotPlot.R
new file mode 100644
index 0000000..706b700
--- /dev/null
+++ b/R/dotPlot.R
@@ -0,0 +1,32 @@
+dotPlot <- function(seq1, seq2, wsize = 1, wstep = 1, nmatch = 1, col = c("white", "black"),
+xlab = deparse(substitute(seq1)), ylab = deparse(substitute(seq2)), ...){
+ #
+ # Check arguments:
+ #
+ if(nchar(seq1[1]) > 1) stop("seq1 should be provided as a vector of single chars")
+ if(nchar(seq2[1]) > 1) stop("seq2 should be provided as a vector of single chars")
+ if(wsize < 1) stop("non allowed value for wsize")
+ if(wstep < 1) stop("non allowed value for wstep")
+ if(nmatch < 1) stop("non allowed value for nmatch")
+ if(nmatch > wsize) stop("nmatch > wsize is not allowed")
+ #
+ # sliding window on sequences:
+ #
+ mkwin <- function(seq, wsize, wstep){
+ sapply(seq(from = 1, to = length(seq) - wsize + 1, by = wstep), function(i) c2s(seq[i:(i + wsize - 1)]))
+ }
+ wseq1 <- mkwin(seq1, wsize, wstep)
+ wseq2 <- mkwin(seq2, wsize, wstep)
+ if( nmatch == wsize ){
+ # perfect match case
+ xy <- outer(wseq1, wseq2, "==")
+ } else {
+ # partial match case
+ "%==%" <- function(x, y) colSums(sapply(x, s2c) == sapply(y, s2c)) >= nmatch
+ xy <- outer(wseq1, wseq2, "%==%")
+ }
+ image(x = seq(from = 1, to = length(seq1), length = length(wseq1)),
+ y = seq(from = 1, to = length(seq2), length = length(wseq2)),
+ z = xy, col = col, xlab = xlab, ylab = ylab, ...)
+ box()
+}
diff --git a/R/draw.oriloc.R b/R/draw.oriloc.R
new file mode 100644
index 0000000..df739af
--- /dev/null
+++ b/R/draw.oriloc.R
@@ -0,0 +1,73 @@
+draw.oriloc <- function(ori, main = "Title",
+ xlab = "Map position in Kb",
+ ylab = "Cumulated combined skew in Kb", las = 1, las.right = 3,
+ ta.mtext = "Cumul. T-A skew", ta.col = "pink", ta.lwd = 1,
+ cg.mtext = "Cumul. C-G skew", cg.col = "lightblue", cg.lwd = 1,
+ cds.mtext = "Cumul. CDS skew", cds.col = "lightgreen", cds.lwd = 1,
+ sk.col = "black", sk.lwd = 2,
+ add.grid = TRUE, ...){
+
+#
+# Get data and use Kb units for skews and map positions:
+#
+ ta <- ori$x/1000 # Cumulated T-A skew in Kb
+ cg <- ori$y/1000 # Cumulated C-G skew in Kb
+ skew <- ori$skew/1000 # Cumulated combined skew in Kb
+ cdsskew <- ori$CDS.excess # Cumulated CDS orientation bias
+ start.kb <- ori$start.kb
+ end.kb <- ori$end.kb
+#
+# This is to deal with CDS that wrapp around the genome:
+#
+ wrapped <- which(abs(ori$start.kb - ori$end.kb) >= 50)
+
+ if(length(wrapped)!=0){
+ if(wrapped == length(cdsskew)){
+ start.kb[wrapped] <- max(start.kb[wrapped], end.kb[wrapped])
+ end.kb[wrapped] <- max(start.kb[wrapped], end.kb[wrapped])
+ }
+ if(wrapped == 1){
+ end.kb[wrapped] <- min(start.kb[wrapped], end.kb[wrapped])
+ start.kb[wrapped] <- min(start.kb[wrapped], end.kb[wrapped])
+ }
+ }
+#
+# Use CDS midpoints as x-coordinates:
+#
+ meancoord <- (start.kb + end.kb)/2
+
+ ymin <- min(ta, cg, skew)
+ ymax <- max(ta, cg, skew)
+ xmin <- min(meancoord)
+ xmax <- max(meancoord)
+
+ ticks <- pretty(cdsskew)
+
+ ticks.y <- (ymax-ymin)/(max(cdsskew) - min(cdsskew))*(ticks - min(cdsskew)) + ymin
+ cds.y <- (ymax-ymin)/(max(cdsskew) - min(cdsskew))*(cdsskew - min(cdsskew)) + ymin
+
+ plot(meancoord, cg, type="l", xlab = xlab,
+ ylab = ylab, xlim = c(xmin, xmax), ylim = c(ymin, ymax),
+ cex.lab = 1.35, col = cg.col, main = main, lwd = cg.lwd, las = las, ...)
+
+ axis(side = 4, at = ticks.y, labels = ticks, col = cds.col, las = las.right)
+
+#
+# Add vertical grid when required:
+#
+ if(add.grid){
+ tmp <- pretty(meancoord)
+ abline(v = tmp, col = "grey", lty=3)
+ tmp <- tmp[-length(tmp)] + diff(tmp)/2
+ abline(v = tmp, col = "grey", lty=3)
+ }
+
+ lines(meancoord, ta, col = ta.col, lwd = ta.lwd)
+ lines(meancoord, skew, col = sk.col, lwd = sk.lwd)
+ lines(meancoord, cds.y, col = cds.col, lwd = cds.lwd)
+
+ mtext(ta.mtext, col = ta.col, adj = 0)
+ mtext(cg.mtext, col = cg.col)
+ mtext(cds.mtext, col = cds.col, adj=1)
+
+}
diff --git a/R/draw.rearranged.oriloc.R b/R/draw.rearranged.oriloc.R
new file mode 100644
index 0000000..3d66b42
--- /dev/null
+++ b/R/draw.rearranged.oriloc.R
@@ -0,0 +1,57 @@
+draw.rearranged.oriloc <- function(rearr.ori,breaks.gcfw=NA,breaks.gcrev=NA,breaks.atfw=NA,breaks.atrev=NA){
+
+ rearat=cumsum(rearr.ori$atskew.rear)
+ reargc=cumsum(rearr.ori$gcskew.rear)
+ strand.rear=rearr.ori$strand.rear
+ cds.rear=rep(1,length(strand.rear))
+ cds.rear[strand.rear=="reverse"]=-1
+ rearcds=cumsum(cds.rear)
+ meancoord.rear=rearr.ori$meancoord.rear
+
+ ymin <- min(rearat,reargc)
+ ymax <- max(rearat,reargc)
+ xmin <- min(meancoord.rear)
+ xmax <- max(meancoord.rear)
+
+ ticksrear <- pretty(rearcds)
+
+ ticks.yrear <- (ymax-ymin)/(max(rearcds)-min(rearcds))*(ticksrear - min(rearcds)) + ymin
+ cds.yrear <- (ymax-ymin)/(max(rearcds)-min(rearcds))*(rearcds - min(rearcds)) + ymin
+
+ plot(meancoord.rear,reargc, type="l", xlab="Map position (gene index)",
+ ylab = "Cumulated normalized skew",xlim=c(xmin,xmax),ylim=c(ymin,ymax),cex.lab=1.35,col="blue",main="Rearranged nucleotide skews",lwd=2,cex.main=1.4)
+
+ abline(v=sum(strand.rear=="forward"),col="black",lwd=2)
+
+ axis(side = 4, at = ticks.yrear, labels = ticksrear, col = "black", col.axis ="black")
+
+ tmp <- pretty(meancoord.rear)
+ abline(v=tmp, col="grey", lty=3,lwd=1.5)
+ tmp <- tmp[-length(tmp)] + diff(tmp)/2
+ abline(v=tmp, col="grey", lty=3,lwd=1.5)
+
+ lines(meancoord.rear,rearat, col="red",lwd=2)
+
+ lines(meancoord.rear,cds.yrear, col="black",lwd=2)
+
+ mtext("Cumul. A-T skew", col="red", adj=0,cex=1.1)
+ mtext("Cumul. G-C skew", col="blue",cex=1.1)
+ mtext("Cumul. CDS skew", col="black", adj=1,cex=1.1)
+
+ if(sum(is.na(breaks.gcfw))==0){
+ segments(x0=meancoord.rear[breaks.gcfw],y0=reargc[breaks.gcfw]-(ymax-ymin)/30,x1=meancoord.rear[breaks.gcfw],y1=reargc[breaks.gcfw]+(ymax-ymin)/30,col="blue",lwd=2,lty=1)
+ }
+
+ if(sum(is.na(breaks.gcrev))==0){
+ segments(x0=meancoord.rear[breaks.gcrev],y0=reargc[breaks.gcrev]-(ymax-ymin)/30,x1=meancoord.rear[breaks.gcrev],y1=reargc[breaks.gcrev]+(ymax-ymin)/30,col="blue",lwd=2,lty=1)
+ }
+
+ if(sum(is.na(breaks.atfw))==0){
+ segments(x0=meancoord.rear[breaks.atfw],y0=rearat[breaks.atfw]-(ymax-ymin)/30,x1=meancoord.rear[breaks.atfw],y1=rearat[breaks.atfw]+(ymax-ymin)/30,col="red",lwd=2,lty=1)
+ }
+
+ if(sum(is.na(breaks.atrev))==0){
+ segments(x0=meancoord.rear[breaks.atrev],y0=rearat[breaks.atrev]-(ymax-ymin)/30,x1=meancoord.rear[breaks.atrev],y1=rearat[breaks.atfw]+(ymax-ymin)/30,col="red",lwd=2,lty=1)
+ }
+
+}
diff --git a/R/draw.recstat.R b/R/draw.recstat.R
new file mode 100755
index 0000000..72708e2
--- /dev/null
+++ b/R/draw.recstat.R
@@ -0,0 +1,185 @@
+##
+# This function draws two graphics, one of the CA of a DNA sequence, and one of
+# start/stop codons positions in the three reading frames. This for the direct or
+# the reverse strand.
+##
+#v.18.08.2011
+draw.recstat <- function(rec, fac = 1, direct = TRUE, xlim = c(1, seqsize), col = c("red", "blue", "purple"))
+{
+ if (fac < 0 | 4 < fac)
+ { # test if factor is between 1 and 4
+ print("Factor number is not in 1:4.")
+ return()
+ }
+ seq <- rec[[1]] # recovery of elements of list rec
+ sizewin <- rec[[2]]
+ shift <- rec[[3]]
+ seqsize <- rec[[4]]
+ seqname <- rec[[5]]
+ vstopd <- rec[[8]]
+ vstopr <- rec[[9]]
+ vinitd <- rec[[10]]
+ vinitr <- rec[[11]]
+ recd <- rec[[14]]
+ recr <- rec[[15]]
+ if (xlim[1] < 1 | xlim[1] > seqsize)
+ {
+ xlim <- c(1, xlim[2])
+ }
+ if (seqsize < xlim[2] | 1 > xlim[2])
+ {
+ xlim <- c(xlim[1], seqsize)
+ }
+ par(mfrow = c(2, 1), mar = c(0, 4, 4, 2) + 0.1) # division of the window for a closer between plots
+ par(xaxs = "i")
+ seqisize <- floor((dim(recd$li)[1])/3) # number of window by reading frame, we take the integer part
+ if ((dim(recd$li)[1])%%3 == 1) # adaptation of number of window between each reading frame
+ {
+ seqisize1 <- seqisize + 1 # for fr1
+ seqisize2 <- seqisize # for fr2
+ }
+ if ((dim(recd$li)[1])%%3 == 2)
+ {
+ seqisize1 <- seqisize + 1
+ seqisize2 <- seqisize + 1
+ }
+ if ((dim(recd$li)[1])%%3 == 0)
+ {
+ seqisize1 <- seqisize
+ seqisize2 <- seqisize
+ }
+ ##
+ ##direct strand##
+ ##
+ if (direct)
+ {
+ plot((sizewin/2) + (0:(seqisize1 - 1))*shift, recd$li[1:seqisize1, fac], type = "l", lty = 1,
+ col = col[1], xlim = xlim, ylim = c(min(recd$li[, fac]), max(recd$li[, fac])),
+ main = "Direct strand", xlab = "", ylab = "Factor scores", bty = 'l') # reading frame 1
+ lines((sizewin/2) + (0:(seqisize2 - 1))*shift + 1, recd$li[(seqisize1 + 1):(seqisize1 + seqisize2), fac],
+ lty = 2, col = col[2], ylab = "2") # reading frame 2
+ lines((sizewin/2) + (0:(seqisize - 1))*shift + 2, recd$li[(seqisize1 + seqisize2 + 1):(dim(recd$li)[1]), fac],
+ lty = 3, col = col[3], ylab = "3") # reading frame 3
+ legend("topleft", legend = c(paste("Sequence name:", seqname), paste("Sequence length:", seqsize, "bp")),
+ inset = c(-0.15, -0.2), bty = "n", xpd = TRUE)
+ vstopdindphase <- numeric()
+ if (length(vstopd) > 0)
+ { # test if vector is not empty because problem with modulo
+ vstopdindphase <- sapply(1:length(vstopd), function(x)
+ { # index vector of reading frame of vector vstopd
+ if (vstopd[x]%%3 == 1)
+ {
+ vstopdindphase <- c(vstopdindphase, 2.5)
+ }
+ else
+ {
+ if (vstopd[x]%%3 == 2)
+ {
+ vstopdindphase <- c(vstopdindphase, 1.5)
+ }
+ else
+ {
+ vstopdindphase <- c(vstopdindphase, 0.5)
+ }
+ }
+ })
+ }
+ vinitdindphase <- numeric()
+ if (length(vinitd) > 0)
+ { # test if vector is not empty because problem with modulo
+ vinitdindphase <- sapply(1:length(vinitd), function(x)
+ { # index vector of reading frame of vector vinitd
+ if (vinitd[x]%%3 == 1)
+ {
+ vinitdindphase <- c(vinitdindphase, 3)
+ }
+ else
+ {
+ if (vinitd[x]%%3 == 2)
+ {
+ vinitdindphase <- c(vinitdindphase, 2)
+ }
+ else
+ {
+ vinitdindphase <- c(vinitdindphase, 1)
+ }
+ }
+ })
+ }
+ par(mar = c(5, 4, 3, 2) + 0.1)
+ plot(vstopd, vstopdindphase, pch = 25, cex = 0.7, xlim = xlim, ylim = c(0.25, 3), axes = TRUE,
+ ann = TRUE, tcl = -0.5, bty = 'l', yaxt = 'n', xlab = "Start/Stop positions (bp)",
+ ylab = '', xpd = FALSE) # stop codons positions
+ points(vinitd, vinitdindphase, pch = 24, bg = "slategray", cex = 0.7, col = 'slategray') # start codons positions
+ abline(h = c(3.1, 2.4, 2.1, 1.4, 1.1, 0.4), col = c(col[1], col[1], col[2], col[2], col[3], col[3]),
+ lty = c(1, 1, 2, 2, 3, 3))
+ text(x = (xlim[1]-(xlim[2]-xlim[1])*0.75/6), pos = 4, y = c(2.75, 1.75, 0.75), labels = paste("Ph. ", c(0, 1, 2)), xpd = TRUE)
+ }
+ ##
+ ##reverse strand##
+ ##
+ if (!direct)
+ {
+ plot((sizewin/2) + (0:(seqisize1 - 1))*shift, recr$li[1:seqisize1, fac], type = "l", lty = 1,
+ col = col[1], xlim = xlim, ylim = c(min(recr$li[, fac]), max(recr$li[, fac])),
+ main = "Reverse strand", xlab = "", ylab = "Factor scores", bty = 'l') # reading frame 1
+ lines((sizewin/2) + (0:(seqisize2-1))*shift + 1, recr$li[(seqisize1 + 1):(seqisize1 + seqisize2), fac],
+ lty = 2, col = col[2], ylab="2") # reading frame 2
+ lines((sizewin/2) + (0:(seqisize - 1))*shift + 2, recr$li[(seqisize1 + seqisize2 + 1):(dim(recr$li)[1]), fac],
+ lty = 3, col = col[3], ylab = "3") # reading frame 3
+ legend("topleft", legend = c(paste("Sequence name:", seqname), paste("Sequence length:", seqsize, "bp")),
+ inset = c(-0.15, -0.2), bty = "n", xpd = TRUE)
+ vstoprindphase <- numeric()
+ if (length(vstopr) > 0)
+ { # test if vector is not empty because problem with modulo
+ vstoprindphase <- sapply(1:length(vstopr), function(x)
+ { # index vector of reading frame of vector vstopr
+ if (vstopr[x]%%3 == 1)
+ {
+ vstoprindphase <- c(vstoprindphase, 2.5)
+ }
+ else
+ {
+ if (vstopr[x]%%3 == 2)
+ {
+ vstoprindphase <- c(vstoprindphase, 1.5)
+ }
+ else
+ {
+ vstoprindphase <- c(vstoprindphase, 0.5)
+ }
+ }
+ })
+ }
+ vinitrindphase <- numeric()
+ if (length(vinitr) > 0)
+ { # test if vector is not empty because problem with modulo
+ vinitrindphase <- sapply(1:length(vinitr), function(x)
+ { # index vector of reading frame of vector vinitr
+ if (vinitr[x]%%3 == 1)
+ {
+ vinitrindphase <- c(vinitrindphase, 3)
+ }
+ else
+ {
+ if (vinitr[x]%%3 == 2)
+ {
+ vinitrindphase <- c(vinitrindphase, 2)
+ }
+ else
+ {
+ vinitrindphase <- c(vinitrindphase, 1)
+ }
+ }
+ })
+ }
+ par(mar = c(5, 4, 3, 2) + 0.1)
+ plot(vstopr, vstoprindphase, pch = 25, cex = 0.7, xlim = xlim, ylim = c(0.25, 3), axes = TRUE,
+ ann = TRUE, tcl = -0.5, bty = 'l', yaxt = 'n', xlab = "Start/Stop positions (bp)",
+ ylab = '', xpd = FALSE) # stop codons positions
+ points(vinitr, vinitrindphase, pch = 24, bg = "slategray", cex = 0.7, col = 'slategray') # start codons positions
+ abline(h = c(3.1, 2.4, 2.1, 1.4, 1.1, 0.4), col = c(col[1], col[1], col[2], col[2], col[3], col[3]),
+ lty = c(1, 1, 2, 2, 3, 3))
+ text(x = (xlim[1]-(xlim[2]-xlim[1])*0.75/6), pos = 4, y = c(2.75, 1.75, 0.75), labels = paste("Ph. ", c(0, 1, 2)), xpd = TRUE)
+ }
+}
\ No newline at end of file
diff --git a/R/extract.breakpoints.R b/R/extract.breakpoints.R
new file mode 100644
index 0000000..2cbd535
--- /dev/null
+++ b/R/extract.breakpoints.R
@@ -0,0 +1,99 @@
+extract.breakpoints <- function(rearr.ori,type=c("atfw","atrev","gcfw","gcrev"),nbreaks,gridsize=100,it.max=500){
+
+ if(length(type)==0){
+ stop("You must specify the type of skew: atfw, atrev, gcfw or gcrev. See ?extract.breakpoints for more details.")
+ }
+ if(sum(!type%in%c("atfw","atrev","gcfw","gcrev"))!=0){
+ stop("The type of skew must be one of the following: atfw, atrev, gcfw or gcrev. See ?extract.breakpoints for more details.")
+ }
+
+ result=list()
+
+ for(t in type){
+
+ if(t=="gcfw"){
+ print("Extracting breakpoints for GC-skew, forward-encoded genes")
+ x.breaks=rearr.ori$meancoord.rear[rearr.ori$strand.rear=="forward"]
+ y.breaks=cumsum(rearr.ori$gcskew.rear[rearr.ori$strand.rear=="forward"])
+ }
+ if(t=="gcrev"){
+ print("Extracting breakpoints for GC-skew, reverse-encoded genes")
+ x.breaks=rearr.ori$meancoord.rear[rearr.ori$strand.rear=="reverse"]
+ y.breaks=cumsum(rearr.ori$gcskew.rear[rearr.ori$strand.rear=="reverse"])
+ }
+ if(t=="atfw"){
+ print("Extracting breakpoints for AT-skew, forward-encoded genes")
+ x.breaks=rearr.ori$meancoord.rear[rearr.ori$strand.rear=="forward"]
+ y.breaks=cumsum(rearr.ori$atskew.rear[rearr.ori$strand.rear=="forward"])
+ }
+ if(t=="atrev"){
+ print("Extracting breakpoints for AT-skew, reverse-encoded genes")
+ x.breaks=rearr.ori$meancoord.rear[rearr.ori$strand.rear=="reverse"]
+ y.breaks=cumsum(rearr.ori$atskew.rear[rearr.ori$strand.rear=="reverse"])
+ }
+
+ assign("x.breaks",x.breaks,envir=.seqinrEnv)
+ assign("y.breaks",y.breaks,envir=.seqinrEnv)
+
+ rss=numeric(0)
+ starts=list()
+
+ i=0
+ while(i<gridsize){
+
+ initpsi=runif(nbreaks[which(type==t)],min=min(x.breaks),max=max(x.breaks))
+ if(exists("seg")){
+ rm(seg)
+ }
+
+ try(seg <- segmented::segmented(lm(y.breaks~x.breaks),x.breaks,psi=initpsi,it.max=it.max), silent = TRUE)
+
+
+ if(exists("seg")){
+ starts[[length(starts)+1]]=initpsi
+ rss=c(rss, sum(seg$residuals^2))
+ i=i+1
+ }
+
+ gc()
+ }
+
+ wmin=which.min(rss)
+
+
+ starts=starts[[wmin]]
+
+ seg=seg <- segmented::segmented(lm(y.breaks~x.breaks),x.breaks,psi=starts,it.max=it.max)
+
+ breaks=round(seg$psi[,2])
+
+ breaks=breaks[order(breaks)]
+
+ sl=c(x.breaks[1],breaks,x.breaks[length(x.breaks)])-x.breaks[1]+1
+
+
+ slopes=numeric(length(sl)-1)
+
+ for(i in seq_len(length(sl)-1)){
+ u=(sl[i]:sl[i+1])+x.breaks[1]-1
+ slopes[i]=summary(lm(y.breaks[sl[i]:sl[i+1]]~u))$coefficients[2,1]
+ }
+
+ slopes.left=slopes[-length(slopes)]
+ slopes.right=slopes[-1]
+
+ result[[t]]=list()
+
+ result[[t]]$breaks=breaks
+ result[[t]]$slopes.left=slopes.left
+ result[[t]]$slopes.right=slopes.right
+
+ real.coord=rearr.ori$meancoord.real[breaks]
+ result[[t]]$real.coord=real.coord
+
+
+ }
+
+ return(result)
+
+}
diff --git a/R/extractseqs.R b/R/extractseqs.R
new file mode 100644
index 0000000..321a2f3
--- /dev/null
+++ b/R/extractseqs.R
@@ -0,0 +1,113 @@
+# Recuperation des sequences
+# Simon Octobre 2006
+
+
+
+
+##########################################################################################
+# #
+# extractseqs.socket #
+# #
+##########################################################################################
+
+extractseqs <- function(listname, socket = autosocket(), format="fasta", operation="simple", feature="xx", bounds="xx", minbounds="xx", verbose = FALSE, nzlines=1000, zlib = FALSE){
+ if (zlib == TRUE && .Platform$OS.type == "windows") {
+ stop(paste("Zlib compressed sockets is not implemented for windows.\n"))
+ }
+ debug<-0
+ if (verbose) debug <- 1
+
+ if(verbose) cat("I'm checking the arguments...\n")
+
+ if( !inherits(socket, "sockconn") ) stop(paste("argument socket = ", socket, "is not a socket connection."))
+ if( !is.character(listname) ) stop(paste("argument listname = ", listname, "is not a character string."))
+ if(verbose) cat("... and everything is OK up to now.\n")
+
+
+# Check arguments:
+# Check if format is acnuc", "fasta", or "flat"
+
+ if(verbose) cat("Format is ",format,"\n")
+ if ((format != "fasta") && (format != "flat") && (format != "acnuc")) stop(paste("argument format = ", format, "is wrong. Format should be \"fasta\", \"flat\" or \"acnuc\"! "))
+
+
+# Check if operation "is simple", "translate", "fragment", "feature" or "region"
+
+ if(verbose) cat("Operation is ",operation,"\n")
+ if ((operation != "simple") && (operation != "translate") && (operation != "fragment") && (operation != "feature") && (operation != "region") ) {
+ stop(paste("argument operation = ", operation, "is wrong. Operation should be \"simple\", \"translate\", \"fragment\", \"feature\" or \"region\"! "))
+ }
+
+
+# Check optionals
+
+if ((feature != "xx") && (verbose))cat("feature = ", feature, ".\n")
+if ((bounds != "xx") && (verbose)) cat("bounds = ", bounds, ".\n")
+if ((minbounds != "xx") && (verbose)) cat("minbounds = ", minbounds, ".\n")
+
+
+if ((operation == "feature") && (feature =="xx")) stop(paste("You should specify a feature!\n"))
+
+if ((operation == "fragment") && (bounds =="xx")) stop(paste("You should specify bounds!\n"))
+
+if ((operation == "region") && ((bounds =="xx") || (feature =="xx"))) stop(paste("You should specify bounds and region!\n"))
+
+#
+# Build request:
+#
+# listname is a list
+
+ lrank <- glr(listname)
+ if(verbose) cat("The rank of the list ",listname, "is ",lrank,".\n")
+ if (is.na(lrank)) {
+ stop(paste("Problem in rank list!\n"))
+ }
+
+ request <- paste("extractseqs&lrank=", lrank, "&format=", format, "&operation=", operation, sep = "")
+ if (feature != "xx") request <- paste(request,"&feature=", feature, sep = "")
+ if (bounds != "xx") request <- paste(request,"&bounds=",bounds, sep = "")
+ if (minbounds != "xx") request <- paste(request,"&minbounds=",minbounds, sep = "")
+
+ if(zlib) {
+ request <- paste(request,"&zlib=T", sep = "")
+ }
+ else {
+ request <- paste(request,"&zlib=F", sep = "")
+ }
+ if(verbose) cat("request : ",request,"\n")
+
+ # Write request into the socket:
+
+ writeLines(request , socket, sep="\n")
+
+ # Read result from server:
+
+ if(zlib) {
+ lastres <- .Call("getzlibsock", socket, nzlines, debug,PACKAGE = "seqinr")
+ }
+ else {
+ lastres <- readLines(socket)
+ }
+ if(zlib) {
+ #
+ # Remove empty lines at the end of lastres:
+ #
+ return( lastres[nchar(lastres) != 0] )
+ }
+ else {
+ #
+ # Remove flags from server:
+ #
+ lastres <- lastres[-1] # code=0
+ lastres <- lastres[-length(lastres)] # extractseqs END.
+ lastres <- lastres[-grep("count=", lastres)] # count=
+ return(lastres)
+ }
+}
+
+#
+# Define a shorthcut for extractseqs
+#
+exseq <- extractseqs
+
+
diff --git a/R/fastacc.R b/R/fastacc.R
new file mode 100644
index 0000000..6f906ff
--- /dev/null
+++ b/R/fastacc.R
@@ -0,0 +1,29 @@
+# fastacc: Fast Allele in Common Count
+#
+# Computes the number of common alleles between a target and a database
+#
+fastacc <- function(target, database)
+{
+ #
+ # Check arguments:
+ #
+ if(!is.raw(target)) stop("raw vector expected for target")
+ noc <- length(target)
+ if(noc < 1) stop("empty target")
+ if(!is.raw(database)) stop("raw vector expected for database")
+ if(length(database) %% noc != 0) stop("database length must be a multiple of target length")
+ #
+ # n is the total number of entries in DB
+ #
+ n <- length(database)/noc
+ n <- as.integer(n)
+ #
+ # Pre-compute the table giving the number of bits per octet
+ #
+ bits_in_char <- sapply(as.raw(0:255), function(x) sum(as.integer(rawToBits(x))))
+ bits_in_char <- as.integer(bits_in_char) # just to make sure
+
+ res <- .Call("fastacc", bits_in_char, target, database, noc, n, PACKAGE = "seqinr")
+
+ return(res)
+}
diff --git a/R/gb2fasta.R b/R/gb2fasta.R
new file mode 100644
index 0000000..2d010ff
--- /dev/null
+++ b/R/gb2fasta.R
@@ -0,0 +1,39 @@
+########################################################################
+#
+# gb2fasta
+#
+# single entry genbank to fasta conversion
+#
+########################################################################
+gb2fasta <- function(source.file, destination.file)
+{
+ input <- readLines(source.file)
+ head <- input[1]
+ head <- unlist(strsplit(head, split=" "))
+ head <- head[nchar(head) > 0]
+ seqname <- head[2]
+ seqsize <- as.integer(head[3])
+ outheader <- sprintf(">%s %d bp", seqname, seqsize)
+
+ confile <- file(destination.file, open="w")
+ writeLines(outheader, confile)
+ #
+ # Look for sequence position:
+ #
+ debut <- which(substring(input,1,6) == "ORIGIN") + 1
+ if( length( debut ) > 1 )
+ stop("Multiple entries not yet implemented !")
+ fin <- which(substring(input,1,2) == "//") - 1
+ if( length( fin ) > 1 )
+ stop("Multiple entries not yet implemented !")
+
+ input <- input[debut:fin]
+ input <- sapply(input, function(x) {
+ return(paste(substr(x,11,20),substr(x,22,31),substr(x,33,42),substr(x,44,53),
+ substr(x,55,64),substr(x,66,75),sep="",collapse="")) } )
+ names(input)<-NULL
+ writeLines(input, confile )
+ close(confile)
+}
+
+
diff --git a/R/gbk2g2.R b/R/gbk2g2.R
new file mode 100644
index 0000000..35e1058
--- /dev/null
+++ b/R/gbk2g2.R
@@ -0,0 +1,86 @@
+#########################################################################
+#
+# gbk2g2
+#
+# Conversion of a genbank format into a run-glimmer2 like format
+#
+#
+#########################################################################
+
+gbk2g2 <- function(
+ gbkfile = "ftp://pbil.univ-lyon1.fr/pub/seqinr/data/ct.gbk",
+ g2.coord = "g2.coord")
+{
+ input <- readLines(gbkfile)
+
+ outfile = file( description = g2.coord, open ="w")
+ #
+ # Keep lines with CDS flag:
+ #
+ input <- input[ substring(input,1,8) == " CDS"]
+
+ #
+ # Extract boudaries strings
+ #
+ get.boundaries <- function( line )
+ {
+ line <- unlist(strsplit(line, split = " "))
+ line <- line[nchar(line) > 1]
+ return(line[2])
+ }
+ input <- sapply(input, get.boundaries)
+ names(input) <- NULL
+
+ #
+ # Look for 5' partial genes:
+ #
+ idx5p <- grep(">", input)
+ if( length( idx5p != 0 ) )
+ warning("5' partial genes encountered (no output):", idx5p)
+
+ #
+ # Look for 3' partial genes:
+ #
+ idx3p <- grep("<", input)
+ if( length( idx3p != 0 ) )
+ warning("3' partial genes encountered (no output):", idx3p)
+
+ #
+ # Look for join in features:
+ #
+ idxjoin <- grep("join", input)
+ if( length( idxjoin != 0 ) )
+ warning("join encountered (no output):", idxjoin)
+
+ #
+ # Define partials and join:
+ #
+ censored <- c(idx5p, idx3p, idxjoin)
+
+ #
+ # Extract boundaries:
+ #
+ for( i in seq(from = 1, to = length(input), by = 1 ) )
+ {
+ if( i %in% censored )
+ next
+
+ tmp <- unlist( strsplit(input[i], split="\\.\\.") )
+ if( length( grep("complement", input[i]) ) == 1 )
+ {
+ end <- as.integer( substring(tmp[1], first = 12 ) ) + as.integer(3)
+ start <- as.integer( substring(tmp[2], first = 1, last = nchar(tmp[2]) - 1 ))
+ line <- sprintf(fmt = "%d %d %d", as.integer(i), start, end)
+ }
+ else #direct strand
+ {
+ start <- as.integer(tmp[1])
+ end <- as.integer(tmp[2]) + as.integer(-3)
+ line <- sprintf(fmt = "%d %d %d", as.integer(i), start, end)
+ }
+ writeLines( line, con = outfile )
+ }
+ close(outfile)
+ invisible(input)
+}
+
diff --git a/R/gbk2g2.euk.R b/R/gbk2g2.euk.R
new file mode 100644
index 0000000..cbfde7d
--- /dev/null
+++ b/R/gbk2g2.euk.R
@@ -0,0 +1,153 @@
+gbk2g2.euk <- function(
+ gbkfile = system.file("sequences/ame1.gbk", package = "seqinr"), g2.coord = "g2.coord")
+{
+ input <- readLines(gbkfile)
+ outfile <- file( description = g2.coord, open ="w+")
+
+
+ #
+ # Keep lines with annotation flag:
+ #
+
+ cds <- which(substring(input,1,8) == " CDS")
+
+ features <- which(substr(input,6,6)!=" ")
+
+ features <- c(features,length(input))
+
+ genes <- which(substr(input,22,26)=="/gene")
+
+ genes.cds <- character(length(cds))
+
+ for(i in seq_len(length(cds))){
+ print(i)
+ this.gene=genes[which(genes>cds[i])[1]]
+
+ nextfeat=features[which(features>cds[i])[1]]
+
+ if(this.gene<nextfeat){
+ genes.cds[i]=unlist(strsplit(input[this.gene],split="\""))[2]
+
+ }
+ else{
+ print(paste("no id for cds #",i))
+ }
+
+ }
+
+
+ #
+ # Extract boundaries strings
+ #
+ get.boundaries <- function( index.line )
+ {
+
+ join <- grep("join",input[index.line])
+
+ if(length(join)>0){ ## there are introns !!
+
+
+ end.par <- grep("\\)",input[index.line])
+
+
+ if(length(end.par)==0){ ## the exons are written on more than one line
+
+ next.end.par <-grep("\\)",input)
+ next.end.par <-next.end.par[which(next.end.par>index.line)[1]] ## we take the first ending parenthesis
+ exons <- paste(input[index.line],paste(substr(input[(index.line+1):next.end.par],22,nchar(input[(index.line+1):next.end.par])),sep="",collapse=""),collapse="",sep="")
+ }
+ else{
+ exons <- input[index.line]
+ }
+
+
+
+
+ complement <- grep("complement",exons)
+
+ if(length(complement)==0){
+
+ exons=unlist(strsplit(exons, split="\\("))[2]
+ exons=unlist(strsplit(exons, split="\\)"))[1]
+
+ exons=unlist(strsplit(exons,split=","))
+ exons.begin=unlist(lapply(exons, function(x) unlist(strsplit(x, split="\\.\\."))[1]))
+ exons.end=unlist(lapply(exons, function(x) unlist(strsplit(x, split="\\.\\."))[2]))
+
+
+
+ }
+
+ else{
+ exons=unlist(strsplit(exons, split="\\("))[3]
+ exons=unlist(strsplit(exons, split="\\)"))[1]
+
+ exons=unlist(strsplit(exons,split=","))
+ exons.end=unlist(lapply(exons, function(x) unlist(strsplit(x, split="\\.\\."))[1]))
+ exons.begin=unlist(lapply(exons, function(x) unlist(strsplit(x, split="\\.\\."))[2]))
+
+
+
+ }
+ }
+ else{
+ complement <- grep("complement",input[index.line])
+
+ exons=unlist(strsplit(input[index.line],split=" "))
+ exons=exons[exons!=""]
+ exons=exons[2]
+
+ if(length(complement)==0){
+ exons.begin=unlist(strsplit(exons, split="\\.\\."))[1]
+ exons.end=unlist(strsplit(exons, split="\\.\\."))[2]
+
+ }
+ else{
+ exons=unlist(strsplit(exons, split="\\("))[2]
+ exons=unlist(strsplit(exons, split="\\)"))[1]
+ exons.begin=unlist(strsplit(exons, split="\\.\\."))[2]
+ exons.end=unlist(strsplit(exons, split="\\.\\."))[1]
+ }
+
+ }
+
+
+ return(list(exons.begin,exons.end))
+
+
+ }
+
+
+
+
+
+
+ already=character(0)
+
+ if(length(cds)>0){
+ for(i in seq_len(length(cds))){
+
+ boundaries=get.boundaries(cds[i])
+ exons.begin=boundaries[[1]]
+ exons.end=boundaries[[2]]
+
+
+
+ for(j in seq_len(length(exons.begin))){
+ phrase=paste(genes.cds[i],exons.begin[j],exons.end[j])
+
+ if(!phrase%in%already){
+ writeLines(paste(genes.cds[i],exons.begin[j],exons.end[j],sep=" "),outfile)
+ already=c(already,phrase)
+ }
+ }
+ }
+
+ }
+
+
+
+ close(outfile)
+
+}
+
diff --git a/R/get.ncbi.R b/R/get.ncbi.R
new file mode 100755
index 0000000..bde97ec
--- /dev/null
+++ b/R/get.ncbi.R
@@ -0,0 +1,388 @@
+
+
+########################################################################
+# get.ncbi
+#
+# Try to connect to ncbi to get a list of complete bacterial genomes.
+# Returns an n by 5 dataframe with
+# species names
+# accesion number
+# size in bp
+# type (chromosome or plasmid)
+# Last update time
+#
+#########################################################################
+get.ncbi <- function(repository = "ftp://ftp.ncbi.nih.gov/genomes/Bacteria/" )
+{
+ #
+ # First of all, check that this computer is not off the net:
+ #
+ if( ! capabilities("http/ftp") )
+ stop("capabilities(\"http/ftp\") is not TRUE")
+
+#
+# BEGIN Proxy problem
+#
+# I have a problem here: the ftp connection apparently does
+# not work when there is a proxy. I have fixed the bug
+# this way, but this is a rather crude and unsatisfactory
+# solution.
+#
+ ftp.proxy.bck <- Sys.getenv("ftp_proxy")
+
+ if( ftp.proxy.bck != "" ) # there is a proxy
+ {
+ warning("I'am trying to neutralize proxies")
+ Sys.setenv("no_proxy" = "")
+ }
+#
+# END Proxy problem
+#
+
+ #
+ # Try to get list of folder in ncbi repository. Note that R build-in ftp
+ # does not allow to do this directly, so we rely on a system call that
+ # will run only under Unix systems. Moreover, not all ftp client supports
+ # this syntax.
+ #
+ sysinfo <- Sys.info()[1]
+
+ if( sysinfo == "Darwin" )
+ {
+ cmd <- sprintf("echo \"ls\" | ftp %s", repository)
+ brut <- readLines(pipe(cmd))
+ }
+ else if( sysinfo == "SunOS" )
+ {
+ #
+ # Build command file for ftp connection
+ #
+ tmpname <- tempfile(pattern="getncbi")
+ tmpcmdfile <- file(tmpname, open="w")
+ writeLines("user anonymous seqteam at biomserv.univ-lyon1.fr", tmpcmdfile)
+ writeLines("cd genomes/Bacteria", tmpcmdfile)
+ writeLines("dir", tmpcmdfile)
+ writeLines("bye", tmpcmdfile)
+ close(tmpcmdfile)
+ #
+ hostname <- unlist(strsplit(repository,split="/"))[3]
+ cmd <- sprintf("ftp -v -n %s < %s", hostname, tmpname)
+ brut <- readLines(pipe(cmd))
+ }
+ else if( sysinfo == "Linux" ){
+ #
+ # Build command file for ftp connection
+ #
+ tmpname <- tempfile(pattern="getncbi")
+ tmpcmdfile <- file(tmpname, open="w")
+ writeLines("user anonymous seqteam at biomserv.univ-lyon1.fr", tmpcmdfile)
+ writeLines("cd genomes/Bacteria", tmpcmdfile)
+ writeLines("dir", tmpcmdfile)
+ writeLines("bye", tmpcmdfile)
+ close(tmpcmdfile)
+ #
+ hostname <- unlist(strsplit(repository,split="/"))[3]
+ cmd <- sprintf("ftp -v -n %s < %s", hostname, tmpname)
+ brut <- readLines(pipe(cmd))
+
+ }
+ else
+ {
+ stop("Unimplemented platform")
+ }
+
+ #
+ # Keep only lines corresponding to folders:
+ #
+ brut <- brut[grep("dr-xr-xr-x", brut)]
+
+
+ #
+ # Now there should be a vector of chr in "brut", each line looking like:
+ #
+ # "dr-xr-xr-x 2 ftp anonymous 4096 Jul 15 15:41 Aeropyrum_pernix"
+ #
+
+ brut <- sapply( brut, strsplit, split=" ")
+ names(brut) <- NULL
+
+ #
+ # Now each element in "brut" should be splited as in:
+ #
+ # [1] "dr-xr-xr-x" "" "" "2"
+ # [5] "ftp" "" "" ""
+ # [9] "" "" "anonymous" ""
+ # [13] "" "" "" "4096"
+ # [17] "Jul" "15" "15:41" "Aeropyrum_pernix"
+ #
+
+ get.last <- function( vector )
+ {
+ return( vector[length(vector)] )
+ }
+ brut <- sapply( brut, get.last)
+ if(length(grep("CLUSTERS",brut))!=0){
+ brut<-brut[-grep("CLUSTERS",brut)] # we remove the CLUSTERS folder, since it doesn't contain any annotated bacterial genomes
+ }
+
+ #
+ # Now "brut" should contains folders names as in:
+ # > brut[1:5]
+ # [1] "Aeropyrum_pernix"
+ # [2] "Agrobacterium_tumefaciens_C58_Cereon"
+ # [3] "Agrobacterium_tumefaciens_C58_UWash"
+ # [4] "Aquifex_aeolicus"
+ # [5] "Archaeoglobus_fulgidus"
+ #
+
+ #
+ # Set vector types for results:
+ #
+
+ species <- character(0)
+ accession <- character(0)
+ size.bp <- integer(0)
+ type <- character(0)
+ lastupdate <- character(0)
+
+ #
+ # Main loop on folders to see what's inside
+ #
+ for( folder in brut )
+ {
+ if( sysinfo == "Darwin" )
+ {
+ where <- paste(repository, folder, "/", sep="", collapse="")
+ cmd <- sprintf("echo \"ls\" | ftp %s", where)
+ whatsin <- readLines(pipe(cmd))
+ closeAllConnections()
+ }
+ else if( sysinfo == "SunOS" )
+ {
+ # Build command file for ftp connection:
+ tmpname <- tempfile(pattern="getncbi")
+ tmpcmdfile <- file(tmpname, open="w")
+ writeLines("user anonymous seqteam at biomserv.univ-lyon1.fr", tmpcmdfile)
+ writeLines(sprintf("cd genomes/Bacteria/%s", folder), tmpcmdfile)
+ writeLines("dir", tmpcmdfile)
+ writeLines("bye", tmpcmdfile)
+ close(tmpcmdfile)
+ #
+ hostname <- unlist(strsplit(repository,split="/"))[3]
+ cmd <- sprintf("ftp -v -n %s < %s", hostname, tmpname)
+ whatsin <- readLines(pipe(cmd))
+ }
+ else if( sysinfo=="Linux"){
+ # Build command file for ftp connection:
+ tmpname <- tempfile(pattern="getncbi")
+ tmpcmdfile <- file(tmpname, open="w")
+ writeLines("user anonymous seqteam at biomserv.univ-lyon1.fr", tmpcmdfile)
+ writeLines(sprintf("cd genomes/Bacteria/%s", folder), tmpcmdfile)
+ writeLines("dir", tmpcmdfile)
+ writeLines("bye", tmpcmdfile)
+ close(tmpcmdfile)
+ #
+ hostname <- unlist(strsplit(repository,split="/"))[3]
+ cmd <- sprintf("ftp -v -n %s < %s", hostname, tmpname)
+ whatsin <- readLines(pipe(cmd))
+ }
+ else
+ {
+ stop("unimplemented platform")
+ }
+ whatsin <- whatsin[ grep("\\.gbk", whatsin)] # Keep only files with ".gbk" extension
+ #
+ # Remove backup files with % extension:
+ #
+ for( i in seq_len(length(whatsin)) )
+ if( substr(whatsin[i], nchar(whatsin[i]), nchar(whatsin)[i]) == "%" )
+ is.na(whatsin[i]) <- TRUE
+ whatsin <- whatsin[!is.na(whatsin)]
+
+ for( i in seq(from=1, to=length(whatsin), by=1 )) # Loop on sequences data
+ {
+ #
+ # Try to get the accession number of this entry:
+ #
+ accname <- unlist(strsplit(whatsin[i], split=" "))
+ accname <- accname[length(accname)]
+ accname <- unlist(strsplit(accname, split="\\."))
+ accname <- accname[1] # The accession number should be in this variable
+ #
+ # Try to get the size of this entry:
+ #
+ entry <- paste(repository, folder, "/", accname, ".gbk", sep="", collapse="")
+ header <- readLines(entry, n=2)
+ closeAllConnections()
+ bp <- unlist(strsplit(header[1], split=" "))
+ bp <- bp[nchar(bp) > 0]
+ bp <- bp[3] # size in bp should be there
+ #
+ # Try to get the last update date of this entry
+ #
+ last <- unlist(strsplit(header[1], split=" "))
+ last <- last[nchar(last) > 0]
+ last <- last[length(last)] # last update time should be there
+
+ #
+ # Try to get the type (chromosome versus plasmid) of this entry:
+ #
+ if( length(grep("plasmid", tolower(header[2]))) != 0 )
+ def <- "plasmid"
+ else if(length(grep("chromosome", tolower(header[2]))) != 0)
+ def <- "chromosome"
+ else if(length(grep("genome", tolower(header[2]))) != 0)
+ def <- "chromosome"
+ else
+ def <- NA
+ #
+ # Begin the human curated part:
+ #
+ if( accname == "NC_002528" ) def <- "chromosome"
+ if( accname == "NC_003454" ) def <- "chromosome"
+ if( accname == "NC_001732" ) def <- "plasmid"
+ if( accname == "NC_001733" ) def <- "plasmid"
+ if( accname == "NC_005042" ) def <- "chromosome"
+ if( accname == "NC_005072" ) def <- "chromosome"
+ if( accname == "NC_004631" ) def <- "chromosome"
+ if( accname == "NC_004344" ) def <- "chromosome"
+ if( accname == "NC_003902" ) def <- "chromosome"
+ if( accname == "NC_005957" ) def <- "chromosome"
+ if( accname == "NC_007984" ) def <- "chromosome"
+ if( accname == "NC_002937" ) def <- "chromosome"
+ if( accname == "NC_005863" ) def <- "plasmid"
+ if( accname == "NC_008054" ) def <- "chromosome"
+ if( accname == "NC_002942" ) def <- "chromosome"
+ if( accname == "NC_005823" ) def <- "chromosome"
+ if( accname == "NC_005824" ) def <- "chromosome"
+ if( accname == "NC_000916" ) def <- "chromosome"
+ if( accname == "NC_007633" ) def <- "chromosome"
+ if( accname == "NC_006855" ) def <- "plasmid"
+ if( accname == "NC_006856" ) def <- "plasmid"
+ if( accname == "NC_006905" ) def <- "chromosome"
+ if( accname == "NC_006511" ) def <- "chromosome"
+ if( accname == "NC_003198" ) def <- "chromosome"
+ if( accname == "NC_007350" ) def <- "chromosome"
+ if( accname == "NC_007351" ) def <- "plasmid"
+ if( accname == "NC_007352" ) def <- "plasmid"
+ if( accname == "NC_003425" ) def <- "plasmid"
+ if( accname == "NC_006833" ) def <- "chromosome"
+ if( accname == "NC_006810" ) def <- "chromosome"
+ if( accname == "NC_008529" ) def <- "chromosome"
+ if( accname == "NC_008531" ) def <- "chromosome"
+ if( accname == "NC_008346" ) def <- "chromosome"
+ if( accname == "NC_008800" ) def <- "chromosome"
+
+
+
+
+ #
+ # Concatenate results:
+ #
+ species <- c(species, folder)
+ accession <- c( accession, accname)
+ size.bp <- c(size.bp, as.integer(bp))
+ lastupdate <- c(lastupdate, last)
+ type <- c(type, def)
+ cat("\n",folder,accname,bp,def,last,"\n")
+ }
+ }
+
+# shouldn't ftp_proxy be restored there ?
+ return(data.frame(I(species), I(accession), size.bp, type, I(lastupdate)))
+}
+
+
+
+
+########################################################################
+# ncbi.fna.url
+#
+# Try to build urls to access complete genome sequences data
+# in fasta format from get.ncbi() output
+#
+########################################################################
+
+ncbi.fna.url <- function( get.ncbi.out = get.ncbi() )
+{
+ build.url <- function( x )
+ {
+ ficname <- unlist(strsplit(x[2],"\\.")) # split prefix and suffix
+ ficname <- ficname[1] # keep prefix
+ ficname <- paste( ficname, ".fna", collapse="", sep="")
+ urlname <- paste("ftp://ftp.ncbi.nih.gov/genomes/Bacteria/", x[1],
+ "/",ficname, collapse="", sep="")
+ return(urlname)
+ }
+ apply( get.ncbi.out, 1, build.url )
+}
+
+########################################################################
+#
+# Try to build urls to access complete genome sequences data
+# in genbank format from get.ncbi() output
+#
+########################################################################
+
+ncbi.gbk.url <- function( get.ncbi.out = get.ncbi() )
+{
+ build.url <- function( x )
+ {
+ urlname <- paste("ftp://ftp.ncbi.nih.gov/genomes/Bacteria/", x[1],
+ "/",x[2], collapse="", sep="")
+ return(urlname)
+ }
+ apply( get.ncbi.out, 1, build.url )
+}
+########################################################################
+# Try to build urls to access complete genome sequences data
+# file *.ptt from get.ncbi() output
+#
+########################################################################
+
+ncbi.ptt.url <- function( get.ncbi.out = get.ncbi() )
+{
+ build.url <- function( x )
+ {
+ ficname <- unlist(strsplit(x[2],"\\.")) # split prefix and suffix
+ ficname <- ficname[1] # keep prefix
+ ficname <- paste( ficname, ".ptt", collapse="", sep="")
+ urlname <- paste("ftp://ftp.ncbi.nih.gov/genomes/Bacteria/", x[1],
+ "/",ficname, collapse="", sep="")
+ return(urlname)
+ }
+ apply( get.ncbi.out, 1, build.url )
+}
+
+########################################################################
+#
+# Try to get the number of cds and genome size
+#
+########################################################################
+
+ncbi.stats <- function( get.ncbi.out = get.ncbi() )
+{
+ gbkurls <- ncbi.gbk.url( get.ncbi.out )
+ ptturls <- ncbi.ptt.url( get.ncbi.out )
+ get.genome.size <- function( url )
+ {
+ header <- readLines( url, n = 1 )
+ tmp <- unlist(strsplit(header, split=" "))
+ tmp <- tmp[nchar(tmp)>0]
+ tmp <- tmp[3]
+ as.integer(tmp)
+ }
+ get.n.prot <- function( url )
+ {
+ lines <- readLines( url, n = 3 )
+ lines <- lines[3]
+ lines <- unlist(strsplit(lines, split=" "))
+ print(lines[1])
+ as.integer(lines[1])
+ }
+
+ gsize <- sapply( gbkurls, get.genome.size)
+ nprot <- sapply( ptturls, get.n.prot )
+ data <- data.frame( cbind(get.ncbi.out[,1], gsize, nprot) )
+ return( data )
+}
diff --git a/R/getAnnot.R b/R/getAnnot.R
new file mode 100644
index 0000000..74b5843
--- /dev/null
+++ b/R/getAnnot.R
@@ -0,0 +1,44 @@
+#
+# To get annotations corresponding to sequences
+#
+getAnnot <- function(object, ...) UseMethod("getAnnot")
+
+getAnnot.default <- function(object, ...)
+ stop(paste("no getAnnot method for objects of class:", class(object)))
+
+getAnnot.list <- function(object, ...)
+ lapply(seq_len(length(object)), function(i) getAnnot(object[[i]], ...))
+
+getAnnot.SeqFastadna <- function(object, ...) attr(object, "Annot")
+getAnnot.SeqFastaAA <- getAnnot.SeqFastadna
+
+getAnnot.SeqAcnucWeb <- function(object, ..., nbl = 100, socket = autosocket()){
+ #
+ # Check arguments:
+ #
+ if(nbl <= 0){
+ warning("Negative or zero value for argument nbl, forced to 1.")
+ nbl <- 1
+ }
+ #
+ # Build request:
+ #
+ request <- paste("read_annots&name=", object, "&nl=", nbl, sep = "")
+ writeLines(request , socket, sep="\n")
+ #
+ # Read result from server:
+ #
+ res <- readLines(socket , n = nbl)
+ #
+ # Remove the "nl=xx&" answer from server on first line:
+ #
+ newfirstline <- unlist(strsplit(res[1], "&"))[2]
+ res[1] <- newfirstline
+ return(res)
+}
+
+getAnnot.qaw <- function(object, ...) getAnnot(object$req, ...)
+
+getAnnot.logical <- function (object, ...)
+ object # so that NA is returned for virtual lists
+
diff --git a/R/getFrag.R b/R/getFrag.R
new file mode 100644
index 0000000..b71494c
--- /dev/null
+++ b/R/getFrag.R
@@ -0,0 +1,47 @@
+#
+# To extract a subsequence from a sequence
+#
+
+getFrag <- function(object, begin, end, ...) UseMethod("getFrag")
+
+getFrag.default <- function(object, begin, end, ...)
+ stop(paste("no getFrag method for objects of class:", class(object)))
+
+getFrag.list <- function(object, begin, end, ...)
+ lapply(seq_len(length(object)), function(i) getFrag(object[[i]], begin = begin, end = end, ...))
+
+getFrag.character <- function(object, begin, end, ...){
+ if(length(object) == 1) object <- s2c(object)
+ if(begin > length(object) || end > length(object) || begin > end) stop("borns are not correct")
+ return(object[begin:end])
+}
+
+getFrag.SeqFastadna <- function(object, begin, end, ..., name = getName(object)){
+ if(end > getLength(object)) stop("invalid end")
+ as.SeqFrag(getSequence(object, as.string = FALSE)[begin:end], begin = begin, end = end,
+ name = name)
+}
+getFrag.SeqFastaAA <- getFrag.SeqFastadna
+
+getFrag.SeqFrag <- function(object, begin, end, ..., name = getName(object)){
+ if((end<begin) || (end>getLength(object))) stop("invalid end")
+ newBegin <- attr(object, "begin") + begin - 1
+ newEnd <- attr(object, "begin") + end - 1
+ newSeq <- object[begin:end]
+ as.SeqFrag(object = newSeq, begin = newBegin, end = newEnd, name = name)
+}
+
+getFrag.SeqAcnucWeb <- function(object, begin, end, ..., socket = autosocket(), name = getName(object)){
+ lobj <- getLength(object)
+ if(end > lobj) stop("end parameter is too large")
+ if(begin > lobj) stop("begin parameter is too large")
+ length <- end - begin + 1
+# newSeq <- getSequenceSocket(socket, object, start = begin, length = length)
+ newSeq <- gfrag(what = name, start = begin, length = length, idby = "name", socket = socket)
+ as.SeqFrag(newSeq, begin = begin, end = end, name = name)
+}
+
+getFrag.qaw <- function(object, begin, end, ...) getFrag(object$req, begin, end, ...)
+
+getFrag.logical <- function (object, begin, end, ...)
+ object # so that NA is returned for virtual lists
diff --git a/R/getKeyword.R b/R/getKeyword.R
new file mode 100644
index 0000000..f33e366
--- /dev/null
+++ b/R/getKeyword.R
@@ -0,0 +1,51 @@
+#
+# To get Keywords associated with a sequence.
+#
+
+getKeyword <- function(object, ...) UseMethod("getKeyword")
+
+getKeyword.default <- function(object, ...)
+ stop(paste("no getKeyword method for objects of class:", class(object)))
+
+getKeyword.list <- function(object, ...)
+ lapply(seq_len(length(object)), function(i) getKeyword(object[[i]], ...))
+
+getKeyword.SeqAcnucWeb <- function(object, ..., socket = autosocket()){
+ getKeywordsocket <- function(socket, name){
+ #modif simon
+ writeLines(paste("isenum&name=", name, sep = ""), socket, sep = "\n")
+ res <- readLines(socket, n = 1)
+ number <- parser.socket(res)[1]
+
+ writeLines(paste("readsub&num=", number, sep = ""), socket, sep = "\n")
+ res2 <- readLines(socket, n = 1)
+ rr <- parser.socket(res2)
+
+ writeLines(paste("readshrt&num=", rr[7], sep = ""), socket, sep = "\n")
+ res3 <- readLines(socket, n = 1)
+ #modif simon
+
+ # Get the nb of kw (not used here)
+ # nbkws <- parser.socket(res3)[2]
+
+ #recupere la liste de paires val, next
+ tmpl <- unlist(strsplit(res3, "&"))
+ #transforme en liste
+ tmpl <- unlist(strsplit(tmpl[3],","))
+ kwl <- unlist(tmpl)[c(TRUE, FALSE)]
+
+ lapply(kwl, function(x){
+ writeLines(paste("readkey&num=", x, sep = ""), socket, sep = "\n")
+ res4 <- readLines(socket, n = 1)
+ res <-parser.socket(res4)[2]
+ substring(res[1], 2, nchar(res[1]) - 1)
+ })
+
+}
+ unlist(getKeywordsocket(socket, name = object))
+}
+
+getKeyword.qaw <- function(object, ...) getKeyword(object$req, ...)
+
+getKeyword.logical <- function (object, ...)
+ object # so that NA is returned for virtual lists
diff --git a/R/getLength.R b/R/getLength.R
new file mode 100644
index 0000000..1531019
--- /dev/null
+++ b/R/getLength.R
@@ -0,0 +1,34 @@
+#
+# To get the length of sequences
+#
+
+getLength <- function(object, ...) UseMethod("getLength")
+
+getLength.default <- function(object, ...)
+ stop(paste("no getLength method for objects of class:", class(object)))
+
+getLength.list <- function(object, ...)
+ sapply(seq_len(length(object)), function(i) getLength(object[[i]], ...))
+
+getLength.character <- function(object, ...){
+ if(length(object) == 1)
+ {
+ return(length(s2c(object)))
+ }
+ else
+ {
+ return(sum(nchar(object)))
+ }
+}
+
+getLength.SeqFastadna <- function(object, ...) length(getSequence(object, as.string = FALSE))
+getLength.SeqFastaAA <- getLength.SeqFastadna
+
+getLength.SeqAcnucWeb <- function(object, ...) attr(object, "length")
+
+getLength.qaw <- function(object, ...) getLength(object$req, ...)
+
+getLength.logical <- function (object, ...)
+ object # so that NA is returned for virtual lists
+
+getLength.SeqFrag <- function(object, ...) attr(object, "end") - (attr(object, "begin") + 1)
diff --git a/R/getLocation.R b/R/getLocation.R
new file mode 100644
index 0000000..633d463
--- /dev/null
+++ b/R/getLocation.R
@@ -0,0 +1,56 @@
+#
+# To get the location of subsequences from an ACNUC server
+#
+
+getLocation <- function(object, ...) UseMethod("getLocation")
+
+getLocation.list <- function(object, ...)
+ lapply(seq_len(length(object)), function(i) getLocation(object[[i]], ...))
+
+getLocation.default <- function(object, ...)
+ stop(paste("no getLocation method for objects of class:", class(object)))
+
+getLocation.SeqAcnucWeb <- function(object, ..., socket = autosocket()){
+ getLocationSocket <- function( socket, name){
+
+ writeLines(paste("isenum&name=",name,sep=""),socket,sep="\n")
+ res = readLines( socket , n=1 )
+ number = parser.socket(res)[1]
+
+ writeLines(paste("readsub&num=",number,sep=""),socket,sep="\n")
+ res2 = readLines( socket , n=1 )
+ rr = parser.socket(res2)
+
+ # Test si subsequence
+
+ l=list()
+ if(as.numeric(rr[5]) != 0){
+ warning("It's a parent sequence\n")
+ return( NA )
+ }
+ else {
+ i=1
+ writeLines(paste("readext&num=",rr[6],sep=""),socket,sep="\n")
+ res3 = readLines( socket , n=1 )
+ r = parser.socket(res3)
+ l[[i]] = as.numeric(c(r[3],r[4]))
+ n=r[5]
+ }
+ while(as.numeric(n) != 0){
+ i=i+1
+ writeLines(paste("readext&num=",n,sep=""),socket,sep="\n")
+ res4 = readLines( socket , n=1 )
+ rrr = parser.socket(res4)
+ l[[i]] = as.numeric(c(rrr[3],rrr[4]))
+ n=rrr[5]
+ }
+ return(l)
+}
+
+ unlist(getLocationSocket(socket, name = object))
+}
+
+getLocation.qaw <- function(object, ...) getLocation(object$req, ...)
+
+getLocation.logical <- function (object, ...)
+ object # so that NA is returned for virtual lists
diff --git a/R/getName.R b/R/getName.R
new file mode 100644
index 0000000..cefa8e9
--- /dev/null
+++ b/R/getName.R
@@ -0,0 +1,22 @@
+#
+# To get sequence names
+#
+
+getName <- function(object, ...) UseMethod("getName")
+
+getName.default <- function(object, ...)
+ stop(paste("no getName method for objects of class:", class(object)))
+
+getName.list <- function(object, ...)
+ sapply(seq_len(length(object)), function(i) getName(object[[i]], ...))
+
+getName.SeqFastadna <- function(object, ...) attr(object,"name")
+getName.SeqFastaAA <- getName.SeqFastadna
+
+getName.SeqAcnucWeb <- function(object, ...) as.character(object)
+
+getName.qaw <- function(object, ...) getName(object$req, ...)
+
+getName.logical <- function (object, ...) object # so that NA is returned for virtual lists
+
+getName.SeqFrag <- function(object, ...) attr(object, "seqMother")
diff --git a/R/getSequence.R b/R/getSequence.R
new file mode 100644
index 0000000..4f0bdea
--- /dev/null
+++ b/R/getSequence.R
@@ -0,0 +1,73 @@
+#
+# To get sequence data
+#
+
+getSequence <- function(object, as.string = FALSE, ...) UseMethod("getSequence")
+
+getSequence.default <- function(object, as.string = FALSE, ...)
+ stop(paste("no getSequence method for objects of class:", class(object)))
+
+getSequence.list <- function(object, as.string = FALSE, ...)
+ lapply(seq_len(length(object)), function(i) getSequence(object[[i]], as.string = as.string, ...))
+
+getSequence.character <- function(object, as.string = FALSE, ...){
+ is.single.string <- function(x) length(x) == 1 && nchar(x) > 1
+ if(is.single.string(object)){
+ if(as.string) return(as.list(object)) else return(s2c(object))
+ } else {
+ if(as.string) return(as.list(c2s(object))) else return(object)
+ }
+}
+
+getSequence.SeqFastadna <- function(object, as.string = FALSE, ...){
+ attributes(object) <- NULL # not needed here
+ getSequence.character(object, as.string, ...)
+}
+
+getSequence.SeqFrag <- getSequence.SeqFastaAA <- getSequence.SeqFastadna
+
+getSequence.SeqAcnucWeb <- function(object, as.string = FALSE, ..., socket = autosocket()){
+#
+# Should call gfrag directly... need to implement as.string for this
+#
+ getSequenceSocket <- function(socket, name, start, length, as.string = FALSE){
+ request <- paste("gfrag&name=", name, "&start=", formatC(start, format = "d"),
+ "&length=", formatC(length, format = "d"), sep = "")
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning(paste("Empty answer from server with sequence name:", name))
+ return(NA)
+ } else {
+ #
+ # Check that no error code is returned by server:
+ #
+ if(substr(x = answerFromServer, start = 1, stop = 5) == "code="){
+ warning(paste("Server returned error code:", answerFromServer, "with sequence name:", name))
+ return(NA)
+ }
+ #
+ # Extract sequence from server answer:
+ #
+ sequence <- unlist(strsplit(answerFromServer, split = "&"))[2]
+ #
+ # Returns the sequence either as a string or as a vector of single chars:
+ #
+ if( as.string ){
+ return(sequence)
+ } else {
+ return(s2c(sequence))
+ }
+ }
+}
+
+ getSequenceSocket(socket, object, start = 1, length = attr(object, "length"), as.string = as.string)
+}
+getSequence.qaw <- function(object, as.string = FALSE, ...) getSequence(object$req, ...)
+
+getSequence.logical <- function (object, as.string = FALSE, ...)
+ object # so that NA is returned for virtual lists
diff --git a/R/getTrans.R b/R/getTrans.R
new file mode 100644
index 0000000..7f03efc
--- /dev/null
+++ b/R/getTrans.R
@@ -0,0 +1,35 @@
+#
+# To translate sequences:
+#
+
+getTrans <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...)
+ UseMethod("getTrans")
+
+getTrans.default <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...)
+ stop(paste("no getTrans method for objects of class:", class(object)))
+
+getTrans.list <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...)
+ lapply(seq_len(length(object)),
+ function(i) getTrans(object[[i]], sens = sens, NAstring = NAstring, ambiguous = ambiguous, ...))
+
+getTrans.character <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ..., frame = 0, numcode = 1)
+ translate(seq = object, frame = frame, sens = sens, numcode = numcode, NAstring = NAstring, ambiguous = ambiguous)
+
+getTrans.SeqFastadna <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ..., frame = 0, numcode = 1){
+ dnaseq <- getSequence(object, as.string = FALSE)
+ translate(seq = dnaseq, frame = frame, sens = sens, numcode = numcode, NAstring = NAstring, ambiguous = ambiguous)
+}
+getTrans.SeqFrag <- getTrans.SeqFastadna
+
+getTrans.SeqAcnucWeb <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ..., frame = "auto", numcode = "auto"){
+ dnaseq <- getSequence(object, as.string = FALSE)
+ if(numcode == "auto") numcode <- attr(object, "ncbigc")
+ if(frame == "auto") frame <- attr(object, "frame")
+ translate(seq = dnaseq, frame = frame, sens = sens, numcode = numcode, NAstring = NAstring, ambiguous = ambiguous)
+}
+
+getTrans.qaw <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...) getTrans(object$req, ...)
+
+getTrans.logical <- function (object, sens = "F", NAstring = "X", ambiguous = FALSE, ...)
+ object # so that NA is returned for virtual lists
+
diff --git a/R/getType.R b/R/getType.R
new file mode 100644
index 0000000..dfdb177
--- /dev/null
+++ b/R/getType.R
@@ -0,0 +1,31 @@
+###################################################################################################
+# #
+# getType #
+# #
+# #
+# To get available subsequence types in an opened ACNUC database #
+# #
+###################################################################################################
+
+getType <- function(socket = autosocket()){
+ #
+ # Get the number of records in SMJYT index file:
+ #
+ nl <- readfirstrec(type = "SMJ")
+
+ #
+ # Read the SMJYT index file:
+ #
+
+ smj <- readsmj(libel.add = TRUE, sname.add = TRUE, nl = nl)
+
+ #
+ # Return available type:
+ #
+ ntype <- sum(smj$nature == "type", na.rm = TRUE)
+ if( is.na(ntype) || ntype == 0 ){
+ return(NA)
+ } else {
+ return( smj[!is.na(smj$nature) & smj$nature == "type", c("sname","libel")] )
+ }
+}
diff --git a/R/getlistrank.R b/R/getlistrank.R
new file mode 100644
index 0000000..8fef43f
--- /dev/null
+++ b/R/getlistrank.R
@@ -0,0 +1,47 @@
+###################################################################################################
+# #
+# getlistrank #
+# #
+# ==> getlistrank&name="xx" #
+# <== lrank=xx #
+# Returns the rank of list, or 0 if no list with name exists. #
+# #
+###################################################################################################
+
+getlistrank <- function(listname, socket = autosocket(), verbose = FALSE) {
+
+ #
+ # Check arguments:
+ #
+ if(verbose) cat("I'm checking the arguments...\n")
+
+ if( !inherits(socket, "sockconn") ) stop(paste("argument socket = ", socket, "is not a socket connection."))
+ if(verbose) cat("... and everything is OK up to now.\n")
+
+ #
+ # Send request to server:
+ #
+ if(verbose) cat("I'm sending query to server...\n")
+ request <- paste("getlistrank&name=\"", listname, "\"", sep = "")
+ writeLines(request, socket, sep = "\n")
+ res <- readLines(socket, n = 1)
+ #
+ # Check for non empty answer from server:
+ #
+ if(verbose) cat(paste("... answer from server is:", res, "\n"))
+ if(length(res) == 0){
+ if(verbose) cat("... answer from server is empty!\n")
+ while(length(res) == 0){
+ if(verbose) cat("... reading again.\n")
+ res <- readLines(socket, n = 1)
+ }
+ }
+ #
+ # Analysing answer from server:
+ #
+ if(verbose) cat("I'm trying to analyse answer from server...\n")
+ result <- as.numeric( unlist(strsplit(res, split = "="))[2] )
+ return(result)
+}
+
+glr <- getlistrank
diff --git a/R/getliststate.R b/R/getliststate.R
new file mode 100644
index 0000000..bbab5f4
--- /dev/null
+++ b/R/getliststate.R
@@ -0,0 +1,38 @@
+# ==> getliststate&lrank=xx
+# <== code=xx&type=[SQ|KW|SP]&name="xx"&count=xx{&locus=[T|F]}
+# Asks for information about the list of specified rank.
+# Reply gives the type of list, its name, the number of elements it contains,
+# and, for sequence lists, says whether the list contains only parent seqs (locus=T).
+# Reply gives code != 0 if error.
+
+getliststate <- function(lrank, socket = autosocket()){
+ #
+ # Build request:
+ #
+ request <- paste("getliststate&lrank=", lrank)
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ #
+ # Build result:
+ #
+ resitem <- parser.socket(answerFromServer)
+ if(resitem[1] != "0"){
+ warning(paste("error code returned by server :", resitem[1]))
+ return(NA)
+ }
+ return(list(type = resitem[2],
+ name = substr(x = resitem[3], start = 2, stop = nchar(resitem[3]) - 1),
+ count = as.numeric(resitem[4]),
+ locus = as.logical(resitem[5])))
+}
+
+gls <- getliststate
+
+gln <- function(lrank, ...) getliststate(lrank, ...)$name
diff --git a/R/gfrag.R b/R/gfrag.R
new file mode 100644
index 0000000..fd40082
--- /dev/null
+++ b/R/gfrag.R
@@ -0,0 +1,42 @@
+# ==> gfrag&[number=xx|name=xx]&start=xx&length=xx
+# <== length=xx&....sequence...
+# Get length characters from sequence identified by name or by number
+# starting from position start (counted from 1).
+# Reply gives the length read (may be shorter than asked for) and then the characters;
+# length can be 0 if any error.
+
+gfrag <- function(what, start, length, idby = c("name", "number"), socket = autosocket()){
+ #
+ # Default is by name:
+ #
+ idby <- idby[1]
+ if(!(idby %in% c("name", "number"))) stop("Wrong idby agument")
+ #
+ # Build request:
+ #
+ request <- paste("gfrag&", idby, "=", what, "&start=", formatC(start, format = "d"),
+ "&length=", formatC(length, format = "d"), sep = "")
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ #
+ # Check that no error code is returned by server:
+ #
+ if(substr(x = answerFromServer, start = 1, stop = 5) == "code="){
+ warning(paste("Server returned error code:", answerFromServer))
+ return(NA)
+ }
+ #
+ # Extract sequence from server:
+ #
+ n <- nchar(answerFromServer)
+ for(i in seq_len(n)) if (substr(answerFromServer, start = i, stop = i) == "&") break
+ result <- substr(answerFromServer, start = i + 1, stop = n)
+ return(result)
+}
diff --git a/R/ghelp.R b/R/ghelp.R
new file mode 100644
index 0000000..2c46cc9
--- /dev/null
+++ b/R/ghelp.R
@@ -0,0 +1,36 @@
+# ==> ghelp&file=xx&item=xx
+# <== nl=xx&...1 or several lines...
+# Reads one item of information from specified help file.
+# File can be HELP or HELP_WIN, item is the name of the desired help item
+# Reply : nl is 0 if any problem, or announces the number of help lines returned.
+
+ghelp <- function(item = c("GENERAL", "SELECT", "SPECIES", "KEYWORD"),
+ file = c("HELP", "HELP_WIN"), socket = autosocket(),
+ catresult = TRUE){
+ #
+ # Default is "HELP" file and "GENERAL":
+ #
+ item <- item[1]
+ file <- file[1]
+ if(!(file %in% c("HELP", "HELP_WIN"))) stop("Wrong file agument")
+ #
+ # Build request:
+ #
+ request <- paste("ghelp&file=", file, "&item=", item, sep = "")
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ #
+ # cat result:
+ #
+ answerFromServer[1] <- unlist(strsplit(answerFromServer[1], split = "&"))[2]
+ if(catresult) cat(answerFromServer, sep = "\n")
+ invisible(answerFromServer)
+}
+
diff --git a/R/isenum.R b/R/isenum.R
new file mode 100644
index 0000000..c26855e
--- /dev/null
+++ b/R/isenum.R
@@ -0,0 +1,80 @@
+# ==> isenum&[name=xx|access=xx]
+# <== number=xx{&length=xx&frame=xx&gencode=xx&ncbigc=xx}{&otheraccessmatches}
+# Finds the acnuc number of a sequence from its name (name= argument) or its accession number (access= argument).
+# The name= and access= arguments are case-insensitive.
+# Reply gives number (or 0 if does not exist), length, reading frame (0, 1, or 2), and
+# genetic code ids of the corresponding sequence (gencode= gives acnuc's genetic code, 0 means universal;
+# ncbigc= gives ncbi's genetic code id, 1 means universal).
+# When &otheraccessmatches appears in reply, it means that several sequences are attached to the given accession no.,
+# and that only the acnuc number of the first attached sequence is given in the number= argument.
+
+
+isenum <- function(what, idby = c("name", "access"), socket = autosocket()){
+ #
+ # Default is by name:
+ #
+ idby <- idby[1]
+ if(!(idby %in% c("name", "access"))) stop("Wrong idby agument")
+ #
+ # Make default return value:
+ #
+ result <- list(number = NA, length = NA, frame = NA, gencode = NA, ncbigc = NA, otheraccessmatches = NA)
+ #
+ # Build request:
+ #
+ request <- paste("isenum&", idby, "=", what, sep = "")
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ #
+ # Build result:
+ #
+ resitem <- parser.socket(answerFromServer)
+ number <- as.numeric(resitem[1])
+ if(number == 0){
+ return(result) # sequence doesn't exist, NA returned
+ } else {
+ result$number <- number
+ }
+ result$length <- as.numeric(resitem[2])
+ if( resitem[length(resitem)] == "otheraccessmatches"){
+ result$otheraccessmatches <- TRUE
+ } else {
+ result$otheraccessmatches <- FALSE
+ }
+ if( length(resitem) <= 3) return(result) # Mother sequence
+
+ result$frame <- as.numeric(resitem[3])
+ result$gencode <- as.numeric(resitem[4])
+ result$ncbigc <- as.numeric(resitem[5])
+
+ return(result)
+}
+
+isn <- function(what, ...) isenum(what, ...)$number
+
+#
+# getNumber.socket (deprecated as from seqinR 1.1-3)
+#
+getNumber.socket <- function(socket, name){
+ warning("getNumber.socket is deprecated, use isn() instead")
+ isn(what = name, socket = socket)
+}
+
+#
+# getAttributsocket (deprecated as from seqinR 1.1-3)
+#
+getAttributsocket <- function( socket, name){
+ warning("getAttributsocket is deprecated, use isenum instead.")
+ request <- paste("isenum&name=", name, sep = "")
+ writeLines( request, socket, sep = "\n")
+ res <- readLines(socket, n = 1)
+ p <- parser.socket(res)
+ return( list(length = as.numeric(p[2]), frame = as.numeric(p[3]), gencode = as.numeric(p[4])) )
+}
diff --git a/R/kaks.R b/R/kaks.R
new file mode 100644
index 0000000..6038f7e
--- /dev/null
+++ b/R/kaks.R
@@ -0,0 +1,81 @@
+kaks <- function(x, verbose=FALSE, debug = FALSE, forceUpperCase = TRUE){
+ #
+ # Check argument class:
+ #
+ if(attr(x,"class") != "alignment") stop("object x must be of class alignment")
+ if(debug){
+ cat("<--- Argument x storage is --->\n")
+ print(str(x))
+ cat("<--- Argument x storage is --->\n")
+ }
+ #
+ # Check that there are at least two sequences in the alignment:
+ #
+ if(x$nb < 2){
+ warning("there should be at least two sequences in the alignment")
+ return(NA)
+ }
+ #
+ # Check that all sequences are of the same length:
+ #
+ lseqs <- nchar(x$seq)
+ if( !all(lseqs == lseqs[1])) {
+ warning("all sequences should be the same length in an alignment")
+ return(NA)
+ }
+ #
+ # Check that the length of sequences is a mutiple of 3 since we are dealing
+ # with coding sequences here:
+ #
+ if( lseqs[1] %% 3 != 0){
+ warning("sequence lengths are not a multiple of 3")
+ return(NA)
+ }
+ #
+ # Force sequences characters to upper case letters when at least one
+ # one 'a', 'c', 'g', or 't' is found in the sequences:
+ #
+ if(forceUpperCase){
+ if( length(grep("[acgt]", x$seq)) != 0){
+ x$seq <- toupper(x$seq)
+ }
+ }
+ #
+ # Call internal C function:
+ #
+ l <- .Call("kaks", x$seq, x$nb, debug, PACKAGE = "seqinr")
+ if(debug){
+ cat("<--- Result l storage is --->\n")
+ print(str(l))
+ print (l)
+ cat("<--- Result l storage is --->\n")
+ }
+ #
+ # If the sequences names are missing, we call them seq1, seq2, and so on:
+ #
+ if( is.null(x$nam) ) x$nam <- paste("seq", seq_len(x$nb), sep = "")
+
+ #
+ # This is to compute the list of results:
+ #
+ mkresult <- function(k){
+ tmp <- matrix( k, x$nb, x$nb, byrow = TRUE, dimnames = list(x$nam, x$nam))
+ as.dist(t(tmp))
+ }
+ #result <- lapply(l[seq_len(4)], mkresult)
+ if (verbose)
+ {
+ result <- lapply(l[seq_len(13)], mkresult)
+ check <- result[[5]] + result[[6]] + result[[7]]
+ result[[14]] <-check
+ names(result) <- c("ka", "ks", "vka", "vks","l0","l2","l4","a0","a2","a4", "b0","b2","b4","checksuml")
+ return(result)
+ }
+ else
+ {
+ result <- lapply(l[seq_len(4)], mkresult)
+ names(result) <- c("ka", "ks", "vka", "vks")
+ return(result)
+ }
+}
+
diff --git a/R/knowndbs.R b/R/knowndbs.R
new file mode 100644
index 0000000..df24737
--- /dev/null
+++ b/R/knowndbs.R
@@ -0,0 +1,48 @@
+# ==> knowndbs{&tag=xx}
+# <== nl=.. \n
+# dbname | on/off | db description \n nl such lines
+# Returns, for each database known by the server, its name (a valid value for the db= argument
+# of the acnucopen command), availability (off means temporarily unavailable), and description.
+# When the optional tag= argument is used, only databases tagged with the given string are listed;
+# without this argument, only untagged databases are listed.
+# The tag argument thus allows to identify series of special purpose (tagged) databases,
+# in addition to default (untagged) ones. The full list of untagged and tagged databases is here.
+
+knowndbs <- function(tag = c(NA, "TP", "TEST", "DEV"), socket = autosocket()){
+ #
+ # Use default tag:
+ #
+ tag <- tag[1]
+ #
+ # Build request:
+ #
+ if( !is.na(tag) ){
+ askforbank <- paste("knowndbs&tag=", tag, sep = "")
+ } else {
+ askforbank <- "knowndbs"
+ }
+ writeLines(askforbank, socket, sep = "\n")
+ rep <- readLines(socket, n = 1)
+ nbank <- as.numeric(parser.socket(rep))
+ #
+ # Read bank infos from server:
+ #
+ res <- readLines(socket, n = nbank)
+ #
+ # Build result:
+ #
+ resdf <- as.data.frame(list(bank = I(rep("NAbank", nbank)),
+ status = I(rep("NAstatus", nbank)),
+ info = I(rep("NAinfo", nbank))))
+ for(i in seq_len(nbank))
+ resdf[i, ] <- unlist(strsplit(res[i], split = "\\|"))[1:3]
+ for(i in seq_len(nbank))
+ for(j in seq_len(3))
+ resdf[i, j] <- trimSpace(resdf[i, j])
+ #
+ # Return result:
+ #
+ return(resdf)
+}
+
+kdb <- knowndbs
diff --git a/R/lseqinr.R b/R/lseqinr.R
new file mode 100644
index 0000000..e3cc5cd
--- /dev/null
+++ b/R/lseqinr.R
@@ -0,0 +1,7 @@
+#
+# Just a shortcut to see what's available inside seqinr
+#
+lseqinr <- function()
+{
+ ls("package:seqinr")
+}
diff --git a/R/modifylist.R b/R/modifylist.R
new file mode 100644
index 0000000..3bd74ed
--- /dev/null
+++ b/R/modifylist.R
@@ -0,0 +1,90 @@
+# ==> modifylist&lrank=..&type=[length|date|scan]&operation=".."
+# <== code=0&lrank=..&name=".."&count=..{&processed=..}
+# code=3 if impossible to create a new list
+# code=2 if incorrect syntax, possibly in operation
+# lrank: (input) rank of bitlist to be modified
+# (output) rank of created bitlist containing result of modify operation
+# type: indicates what kind of modification is to be performed.
+# operation: for length, as in "> 10000" or "< 500"
+# for date, as in "> 1/jul/2001" or "< 30/AUG/98"
+# for scan, specify the string to be searched for
+# prep_getannots must be used before using modifylist&type=scan
+# the client can interrupt the scan operation by sending the escape character on the socket
+# name: name of created bitlist
+# count: number of elements in created bitlist
+# processed: only for scan operation, number of list elements scanned until completion or interruption
+
+
+modifylist <- function(listname, modlistname = listname,
+ operation, type = c("length", "date", "scan"),
+ socket = autosocket(), virtual = FALSE,
+ verbose = FALSE){
+ #
+ # Default is by length:
+ #
+ type <- type[1]
+ if(!(type %in% c("length", "date", "scan"))) stop("Wrong type agument")
+ #
+ # Build request:
+ #
+ request <- paste("modifylist&lrank=", glr(listname, verbose = verbose), "&type=", type, "&operation=\"", operation, "\"", sep = "")
+ if(verbose) cat("-->", request, "<--\n")
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ #
+ # Build result:
+ #
+ if(verbose) cat("Answer from server:", answerFromServer, "\n")
+ resitem <- parser.socket(answerFromServer)
+ if(resitem[1] != "0"){
+ stop(paste("error code returned by server :", resitem[1]))
+ }
+ mlrank <- as.numeric(resitem[2])
+ mlcount <- as.numeric(resitem[4])
+ #
+ # Set list name on server:
+ #
+ suppressWarnings(ressetlistname <- setlistname(lrank = mlrank, name = modlistname, socket = socket))
+ if(is.na(ressetlistname)) stop("Empty answer from server in setlistname")
+ if(ressetlistname == "4") stop("No list of rank mlrank exists")
+ #
+ # Get full list informations:
+ #
+ if( !virtual ){
+ writeLines(paste("nexteltinlist&lrank=", mlrank, "&first=1&count=", mlcount, sep = ""), socket, sep = "\n")
+ res <- readLines(socket, n = mlcount, ok = FALSE)
+ if( length(res) != mlcount )
+ {
+ stop(paste("only", length(res), "list elements were send by server out of", mlcount, "expected.\n"))
+ }
+ #
+ # Extracting info
+ #
+ req <- vector(mode = "list", length = mlcount)
+ for(i in seq_len(mlcount)){
+ x <- parser.socket(res[i])
+ req[[i]] <- as.SeqAcnucWeb(substr(x[2], 2, nchar(x[2]) - 1), x[3], x[6], x[7])
+ }
+ #
+ # Virtual list case:
+ #
+ } else {
+ req <- NA
+ }
+ #
+ # Assign results in workspace:
+ # use getliststate for typelist when avail
+ #
+ result <- list(call = match.call(), name = modlistname, nelem = mlcount, typelist = NA,
+ req = req, socket = socket)
+ class(result) <- c("qaw")
+ assign(modlistname, result, envir = .seqinrEnv)
+}
+
diff --git a/R/move.R b/R/move.R
new file mode 100644
index 0000000..585529b
--- /dev/null
+++ b/R/move.R
@@ -0,0 +1,9 @@
+move <- function(from, to){
+ noms <- as.list(match.call())
+ if(noms$from != noms$to){
+ eval.parent(parse(text = paste(noms$from, "->", noms$to)))
+ eval.parent(parse(text = paste("rm(", noms$from, ")")))
+ }
+}
+
+mv <- move
diff --git a/R/oriloc.R b/R/oriloc.R
new file mode 100644
index 0000000..7d77289
--- /dev/null
+++ b/R/oriloc.R
@@ -0,0 +1,234 @@
+########################################################################
+# oriloc
+#
+# Prediction of replication boundaries in unannotated genomes
+#
+########################################################################
+
+oriloc <- function(
+ seq.fasta = system.file("sequences/ct.fasta.gz", package ="seqinr"),
+ g2.coord = system.file("sequences/ct.predict", package = "seqinr"),
+ glimmer.version = 3,
+ oldoriloc = FALSE,
+ gbk = NULL,
+ clean.tmp.files = TRUE,
+ rot = 0)
+{
+ aGBKfileWasGiven <- !missing(gbk) && !is.null(gbk)
+ if(aGBKfileWasGiven) # Work directly with genbank file
+ {
+ tmpgbk <- tempfile(pattern = "orilocgbk")
+ if(substr(gbk,1,7)=="http://" || substr(gbk,1,6)=="ftp://" || substr(gbk,1,7)=="file://"){
+ download.file( gbk, destfile = tmpgbk )
+ }
+ else{
+ file.copy(from = gbk, to = tmpgbk)
+ }
+ seq.fasta <- tempfile(pattern = "orilocfasta")
+ g2.coord <- tempfile(pattern = "orilocg2")
+
+ gb2fasta( tmpgbk, seq.fasta )
+ gbk2g2( tmpgbk, g2.coord )
+ #
+ # gbk2g2 yields glimmer version 2.0 files, so force to version 2.0 in this case:
+ #
+ glimmer.version <- 2
+ }
+#
+# Get first sequence from fasta file:
+#
+ seq <- read.fasta(file = seq.fasta, set.attributes = FALSE)[[1]]
+ lseq <- length(seq)
+#
+# Read CDS coordinate file:
+#
+ g2 <- readLines( g2.coord )
+#
+# Patch for glimmer3 version:
+#
+ if( glimmer.version > 2 ){
+ # remove first line:
+ g2 <- g2[-1]
+ # remove first three characters (i.e. orf)
+ g2 <- sapply(g2, function(x) substr(x,4,nchar(x)), USE.NAMES = FALSE)
+ }
+#
+# Extract info from g2.coord file
+#
+ tokens <- function( string )
+ {
+ tmp <- unlist(strsplit( string, split = " "))
+ tmp[nchar(tmp) > 0 ][seq_len(3)]
+ }
+ tmp <- sapply( g2, tokens )
+ gnum <- as.numeric(tmp[1, ]) # gene number in g2.coord
+ start <- as.numeric(tmp[2, ]) # start positions in bp
+ end <- as.numeric(tmp[3, ]) # end position in bp
+
+ if( length(start) != length(end) )
+ stop("start and end vector must be the same length")
+#
+# Rotate the genome if required
+#
+ if( rot != 0 )
+ {
+ #
+ # Circular permutation of a vector
+ #
+ rotate <- function(x, rot = 0)
+ {
+ n <- length(x)
+ rot <- rot %% n
+ if(rot == 0)
+ {
+ x
+ }
+ else
+ {
+ c(x[(rot+1):n],x[seq_len(rot)])
+ }
+ }
+ seq <- rotate(x = seq, rot = rot )
+ start <- start - rot
+ end <- end - rot
+
+ start[ start < 1 ] <- start[ start < 1 ] + lseq
+ end[ end < 1] <- end[ end < 1 ] + lseq
+
+ gnum <- gnum[order(start)]
+ end <- end[order(start)]
+
+ start <- sort(start)
+ }
+#
+#
+#
+ pos <- (start + end)/2000 # Mid gene position in Kb
+ ncds <- length(pos)
+#
+# CDS that wrap around the genome
+#
+ wrap <- abs(end-start) > lseq/2
+
+#
+# Compute the DNA walk gene by gene in third codon positions
+#
+ x <- integer(ncds)
+ y <- integer(ncds)
+ skew <- numeric(ncds)
+ CDS.excess <- integer(ncds)
+
+ for( i in order(pos) )
+ {
+ # Look for third codon position
+
+ if( !wrap[i] ) # regular cds that do not wrap around the genome
+ {
+ if( start[i] < end[i] ) # CDS 5'->3' direct strand
+ {
+ CDS.excess[i] <- 1
+ tcp <- seq( from = start[i] + 2, to = end[i], by = 3)
+ }
+ else # complementary strand
+ {
+ CDS.excess[i] <- -1
+ tcp <- seq( from = start[i] - 2, to = end[i], by = -3)
+ }
+ }
+ else # a cds that wraps around the genome
+ {
+ if( start[i] > end[i] ) # CDS 5'->3' direct strand
+ {
+ CDS.excess[i] <- 1
+ tcp <- seq( from = start[i] + 2, to = lseq + end[i], by = 3)
+ tcp[ tcp > lseq ] <- tcp[ tcp > lseq ] - lseq
+ }
+ else # CDS 3'->5' complementary strand
+ {
+ CDS.excess[i] <- -1
+ tcp <- seq( from = start[i] - 2, to = -(lseq-end[i]), by = -3)
+ tcp[ tcp < 1 ] <- tcp[ tcp < 1 ] + lseq
+ }
+ }
+
+ tcnucl <- seq[tcp]
+ x[i] <- length(tcnucl[tcnucl=="t"]) - length(tcnucl[tcnucl=="a"])
+ y[i] <- length(tcnucl[tcnucl=="c"]) - length(tcnucl[tcnucl=="g"])
+ }
+ x <- cumsum(x)
+ y <- cumsum(y)
+ CDS.excess <- cumsum(CDS.excess)
+#
+# Old oriloc program, direct from C without trying to vectorize.
+# To reproduce old results.
+#
+ if( oldoriloc )
+ {
+ Regression <- function(x, y, Li)
+ {
+ a <- 0 ; b <- 0 ; c <- 0;
+ for( m in seq_len(Li-1) ) # I think this should go to Li included
+ {
+ b <- b + y[m]^2
+ a <- a + x[m]^2
+ c <- c + x[m]*y[m]
+ }
+ alfa1 <- (atan(2*c/(a-b)))/2
+ alfa2 <- alfa1 - pi/2;
+ derivate1 <- 2*(a-b)*cos(2*alfa1)+4*c*sin(2*alfa1)
+
+ if(derivate1 < 0)
+ return( tan(alfa2) )
+ else
+ return( tan(alfa1) )
+ }
+
+ slope <- Regression( x, y, ncds )
+
+ for ( i in seq_len(ncds))
+ {
+ X.line <- ( y[i] + slope*x[i] )/(2*slope)
+ Y.line <- slope*X.line
+ distance <- sqrt( Y.line^2 + X.line^2 )
+
+ if( Y.line < 0 )
+ distance <- -distance
+
+ skew[i] <- distance
+ }
+ }
+ else # New oriloc program
+ {
+#
+# Project DNAwalk points (x,y) onto orthogonal regression line
+#
+ pca <- ade4::dudi.pca( cbind(x,y), scann = FALSE, nf = 1, scale = FALSE, center = FALSE )
+ rec <- ade4::reconst(pca)
+ skew <- sign(rec$x)*sqrt(rec$x^2+rec$y^2)
+ }
+#
+# Try to get get a correct orientation (same as GC skew)
+#
+ if( cor(skew, y ) < 0 )
+ skew <- -skew
+#
+# Build result
+#
+ result <- data.frame( cbind( gnum, start/1000, end/1000,
+ CDS.excess, skew, x, y) )
+ names(result) <- c("g2num", "start.kb", "end.kb", "CDS.excess",
+ "skew","x","y")
+#
+# Delete temporary files if requested:
+#
+ if(aGBKfileWasGiven && clean.tmp.files)
+ {
+ file.remove(tmpgbk)
+ file.remove(seq.fasta)
+ file.remove(g2.coord)
+ }
+#
+# the end
+#
+ return(result)
+}
diff --git a/R/parser.socket.R b/R/parser.socket.R
new file mode 100644
index 0000000..4e4d708
--- /dev/null
+++ b/R/parser.socket.R
@@ -0,0 +1,26 @@
+###############################################################################
+# #
+# parser.socket #
+# #
+# Utility function to parse answers from ACNUC server. #
+# #
+###############################################################################
+
+parser.socket <- function(onelinefromserver, verbose = FALSE)
+{
+ if(verbose) cat(paste("parser.socket received: -->",
+ onelinefromserver,"<--\n", sep = ""))
+ if(length(onelinefromserver) == 0){
+ if(verbose) cat("character(0) detected returning NULL\n")
+ return(NULL)
+ }
+ if(is.null(onelinefromserver)){
+ if(verbose) cat("NULL detected returning NULL\n")
+ return(NULL)
+ }
+ #
+ # Answer from server looks like: "code=0&lrank=2&count=150513&type=SQ&locus=F"
+ #
+ loc <- gregexpr("=[^=&]*", onelinefromserver)[[1]]
+ substring(onelinefromserver, loc + 1, loc + attr(loc, "match.length") - 1)
+}
diff --git a/R/peakabif.R b/R/peakabif.R
new file mode 100644
index 0000000..3cb675a
--- /dev/null
+++ b/R/peakabif.R
@@ -0,0 +1,68 @@
+peakabif <- function(abifdata,
+ chanel,
+ npeak,
+ thres = 400/yscale,
+ fig = TRUE,
+ chanel.names = c(1:4,105),
+ DATA = paste("DATA", chanel.names[chanel], sep = "."),
+ tmin = 1/tscale,
+ tmax = abifdata$Data[["SCAN.1"]]/tscale,
+ tscale = 1000,
+ yscale = 1000,
+ irange = (tmin*tscale):(tmax*tscale),
+ y = abifdata$Data[[DATA]][irange]/yscale,
+ method = "monoH.FC",
+ maxrfu = 1000, ...) {
+
+ y[y < thres] <- 0
+ heights <- surfaces <- maxis <- starts <- stops <- numeric(npeak)
+
+ innoise <- TRUE
+ pkidx <- 1
+ for (i in 1:length(y)) {
+ if (y[i] > 0) {
+ if (innoise) {
+ starts[pkidx] <- i
+ innoise <- FALSE
+ }
+ } else {
+ if (!innoise) {
+ stops[pkidx] <- i - 1
+ innoise <- TRUE
+ pkidx <- pkidx + 1
+ }
+ }
+ }
+
+ if (fig) par(mfrow = c(4, 4), mar = c(2, 2, 0, 0) + 0.2, oma = c(0,0,2,0))
+
+ for (i in 1:npeak) {
+ x <- starts[i]:stops[i]
+ if(length(x) <= 2){
+ maxis[i] <- NA
+ warning("Not all requested peaks were assigned")
+ next
+ }
+ spfun <- splinefun(x, y[x], method = method)
+ maxis[i] <- optimize(spfun, interval = range(x), maximum = TRUE)$maximum
+ heights[i] <- spfun(maxis[i])
+ surfaces[i] <- integrate(spfun, starts[i], stops[i])$value
+ if (fig) {
+ xx <- (x-1)/tscale + tmin
+ plot(xx, y[x], type = "p", las = 1, ylim = range(y), ...)
+ abline(h = thres, col = "red")
+ lines(xx, spfun(x), col = "blue")
+ abline(v = (maxis[i]-1)/tscale + tmin, col = "grey")
+ }
+ }
+ #
+ # Compute baseline:
+ #
+ baseline <- baselineabif(abifdata$Data[[DATA]][irange], maxrfu = maxrfu)
+ baseline <- baseline/yscale
+
+ if(fig) mtext(paste(deparse(substitute(abifdata)), ",",
+ DATA, ", tmin =", tmin, ", tmax =", tmax, ", thres =", thres, ", npeak =", npeak, ", yscale = ", yscale), side = 3, outer = TRUE)
+
+ invisible(list(maxis = (maxis-1) + tmin*tscale, heights = yscale*heights, surfaces = yscale*surfaces, baseline = baseline))
+}
diff --git a/R/permutation.R b/R/permutation.R
new file mode 100644
index 0000000..49cfe91
--- /dev/null
+++ b/R/permutation.R
@@ -0,0 +1,44 @@
+"permutation" <-
+function (sequence, modele = "base", frame = 0, replace = FALSE,
+ prot = FALSE, numcode = 1, ucoweight = NULL)
+{
+ if (modele == "base") { #modele 1 : permute toutes les bases entre elles
+ new <- sample(sequence, replace = replace)
+ }
+ else if (modele == "position") { #modele 2 : permute les bases en conservant position
+ CDSseq <- sequence[(frame + 1):length(sequence)]
+ if (length(CDSseq)%%3 != 0) {
+ stop("sequence could not be subdivided in codons")
+ }
+ else {
+ seqI <- CDSseq[seq(1, length(CDSseq), by = 3)]
+ seqII <- CDSseq[seq(2, length(CDSseq), by = 3)]
+ seqIII <- CDSseq[seq(3, length(CDSseq), by = 3)]
+ newCDSseq <- as.vector(t(cbind(sample(seqI, replace = replace),
+ sample(seqII, replace = replace), sample(seqIII,
+ replace = replace))))
+ }
+ new <- c(sequence[0:frame], newCDSseq)
+ }
+ else if (modele == "codon") { #modele 3 : permute tous les codons entre eux
+ split <- splitseq(sequence, frame) #split in codons (frame=starting position of CDS)
+ if (prot == FALSE) {
+ new <- c(sequence[0:frame], s2c(c2s(sample(split,
+ replace = replace))), tail(sequence, (length(sequence) -
+ frame)%%3))
+ }
+ else { #"prot==TRUE" = garder Met et STOP
+ l <- length(split)
+ new <- c(sequence[0:frame], s2c(c2s(c(split[1], sample(split[2:(l -
+ 1)], replace = replace), split[l]))), tail(sequence,
+ (length(sequence) - frame)%%3))
+ }
+ }
+ else if (modele == "syncodon") { #modele 4 : permute/remplace les codons synonymes
+ CDSseq <- sequence[(frame + 1):length(sequence)]
+ newCDSseq <- synsequence(CDSseq, numcode = numcode, ucoweight = ucoweight)
+ new <- c(sequence[0:frame], newCDSseq, tail(sequence,
+ (length(sequence) - frame)%%3))
+ }
+ return(new)
+}
diff --git a/R/plot.SeqAcnucWeb.R b/R/plot.SeqAcnucWeb.R
new file mode 100644
index 0000000..b50ca2c
--- /dev/null
+++ b/R/plot.SeqAcnucWeb.R
@@ -0,0 +1,110 @@
+################################################################################
+#
+# plot.SeqAcnucWeb
+#
+################################################################################
+
+plot.SeqAcnucWeb <- function(x, types = getType()$sname, socket = autosocket(), ...){
+ verbose <- FALSE # a passer en argument si besoin est
+ #
+ # Check arguments:
+ #
+ if(!inherits(x, "SeqAcnucWeb")) stop("Sequence of class SeqAcnucWeb is needed")
+
+ #
+ # Save graphical parameters:
+ #
+ old.par <- par(no.readonly = TRUE)
+ on.exit(par(old.par))
+
+ if(verbose) cat(paste("types:", types, sep = "\n"))
+
+ #
+ # Get the parent sequence:
+ #
+
+ GiveMeTheMotherSequence <- paste("me n=", x, sep = "")
+ query(listname = "me", query = GiveMeTheMotherSequence, socket = socket)
+ MotherLength <- as.numeric(getLength(get("me", .seqinrEnv)$req[[1]]))
+ MotherName <- get("me", .seqinrEnv)$req[[1]]
+ if(verbose) cat("\nMotherLength = ", MotherLength)
+
+ #
+ # Plot organization:
+ #
+ par(mar = c(2.1, 0.1, 4.1, 0.1), lend = "square", ljoin = "mitre")
+ cx <- c(0, MotherLength)
+ cy <- c(0, 1)
+ plot(cx, cy, ann = FALSE, type = "n", axes = FALSE)
+ axis(1)
+ title(main = paste("Physical position of subsequences on the parent sequence",
+ MotherName, "(", MotherLength, "bp )", sep=" "))
+
+ #
+ # Look for subsequences:
+ #
+
+ GiveMeAllSubsequences <- paste("fi n=", MotherName, sep = "")
+ query(listname = "filles", query = GiveMeAllSubsequences, socket = socket)
+
+ n <- length(types) # number of potential subsequences types
+ ispresent <- rep(FALSE, n) # will be TRUE if one or more subsequence of this type is found
+ nb <- numeric(n) # count of subsequences for available types
+ posi <- vector(mode = "list", length = n) # position of subsequences
+
+ for(i in seq_len(n)){
+ q <- paste("filles et t=", types[i], sep = "")
+ if(verbose) cat("query = ", q, "\n")
+
+ result <- try(query(socket = socket, listname = "tmp", query = q))
+ if( inherits(result, "try-error")) next
+ if(get("tmp", .seqinrEnv)$nelem == 0) next
+ if(is.na(get("tmp", .seqinrEnv)$req[[1]])) next
+ if(get("tmp", .seqinrEnv)$req[[1]] == x ) next
+
+ ispresent[i] <- TRUE
+
+ u <- lapply(get("tmp", .seqinrEnv)$req, getLocation)
+ names(u) <- get("tmp", .seqinrEnv)$req
+ nb[i] <- length(u)
+ posi[[i]] <- u
+ }
+
+ #
+ # Draw subsequences:
+ #
+ posi <- posi[ispresent]
+ nb <- nb[ispresent]
+ types <- types[ispresent]
+ n <- length(types)
+ for(i in seq_len(n)){
+ for(j in seq_len(length(posi[[i]]))){
+ xleft <- posi[[i]][[j]][1]
+ ybottom <- (i - 1)/(n + 1)
+ xright <- posi[[i]][[j]][2]
+ ytop <- i/(n + 1)
+ rect(xleft, ybottom, xright, ytop, col = i, border = "black", lend = "square", ljoin = "mitre" )
+ }
+ }
+
+ #
+ # Draw legend:
+ #
+ legend("top", legend = paste(types, "(", nb, ")", sep = ""), fill = seq_len(n), horiz = TRUE, bty = "n")
+
+ resu <- lapply(posi,function(x){lapply(x,unlist)})
+ names(resu) <- types
+
+ #
+ # workspace cleanup
+ #
+
+ rm("me", pos = .seqinrEnv)
+ rm("filles", pos = .seqinrEnv)
+ rm("tmp", pos = .seqinrEnv)
+
+ #
+ # Return invisibly the result:
+ #
+ invisible(resu)
+}
diff --git a/R/plotPanels.R b/R/plotPanels.R
new file mode 100644
index 0000000..7c5f3cd
--- /dev/null
+++ b/R/plotPanels.R
@@ -0,0 +1,22 @@
+plotPanels <- function(kitname, data, xlim = NULL, cex = 0.75, alpha = 0.5){
+ df <- data[[kitname]]
+ df$marker <- as.character(df$marker)
+ df[df$marker == "AMEL", "marker"] <- "A"
+ dcoln <- unique(as.character(df$dye.col))
+ ncol <- length(dcoln)
+ bmin <- min(df$min.bp)
+ bmax <- max(df$max.bp)
+ if(is.null(xlim)) xlim <- c(bmin, bmax)
+ plot.new()
+ plot.window(xlim = xlim, ylim = c(0, ncol))
+ yscale <- (ncol-1):0
+ names(yscale) <- dcoln
+ for(i in 1:nrow(df)){
+ col <- as.character(df[i, "dye.col"])
+ colalpha <- col2alpha(col, alpha)
+ rect(df[i, "min.bp"], yscale[col] + 0.25, df[i, "max.bp"], yscale[col] + 0.5, col = colalpha)
+ text(df[i, "min.bp"], yscale[col]+0.75, df[i, "marker"], pos = 4, cex = cex)
+ }
+ title(main = kitname, xlab = "Amplicon Size Ranges [bp]")
+ axis(1)
+}
diff --git a/R/plotabif.R b/R/plotabif.R
new file mode 100644
index 0000000..f0cedb7
--- /dev/null
+++ b/R/plotabif.R
@@ -0,0 +1,54 @@
+plotabif <- function(abifdata,
+ chanel = 1,
+ tmin = 1/tscale,
+ tmax = abifdata$Data[["SCAN.1"]]/tscale,
+ tscale = 1000,
+ yscale = 1000, type = "l", las = 1,
+ xlab = paste("Time", tscale, sep = "/"),
+ ylab = paste("RFU", yscale, sep = "/"),
+ irange = (tmin*tscale):(tmax*tscale),
+ x = irange/tscale,
+ xlim = c(tmin, tmax),
+ chanel.names = c(1:4,105),
+ DATA = paste("DATA", chanel.names[chanel], sep = "."),
+ y = abifdata$Data[[DATA]][irange]/yscale,
+ ylim = c(min(y), max(y)),
+ dyn = abifdata$Data[[paste("DyeN", chanel, sep = ".")]],
+ main = paste(deparse(substitute(abifdata)), chanel, dyn, sep = " ; "),
+ calibr = NULL,
+ ladder.bp = NULL,
+ allele.names = "identifiler",
+ ladder.lab = TRUE,
+ ...){
+
+ old.par <- par(no.readonly = TRUE)
+ on.exit(par(old.par))
+ if(is.null(calibr)){
+ plot(x, y, type = type, las = las,
+ xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, main = main, ...)
+ } else {
+ x <- calibr(irange)
+ xlim <- range(x)
+ plot(x, y, type = type, las = las,
+ xlab = "Size [bp]", ylab = ylab, xlim = xlim, ylim = ylim, main = main, ...)
+ tps <- pretty(irange)
+ par(cex=0.5)
+ axis(1, at = calibr(tps), tps/tscale, line = 0.4, col = grey(0.5))
+ par(cex=1)
+ if(!is.null(ladder.bp)){ # Allelic ladder add
+ data(list = allele.names,envir=environment())
+ tmp <- get(allele.names)[chanel]
+ n <- length(ladder.bp)
+ labels <- unlist(tmp)
+ col <- rep("black", n)
+ col[grep("\\.", labels)] <- "red"
+ abline(v = ladder.bp, col = col)
+ if(ladder.lab){
+ text(ladder.bp, y = par("usr")[4], labels, xpd = NA,
+ pos = 3, srt = 45, col = col, cex = 0.8)
+ }
+ }
+ }
+ locpar <- par(no.readonly = TRUE)
+ invisible(locpar)
+}
diff --git a/R/plotladder.R b/R/plotladder.R
new file mode 100644
index 0000000..5bd598d
--- /dev/null
+++ b/R/plotladder.R
@@ -0,0 +1,35 @@
+plotladder <- function(abifdata, chanel, calibr, allele.names = "identifiler", npeak = NULL, ...){
+ old.par <- par(no.readonly = TRUE)
+ on.exit(par(old.par))
+
+ data(list = allele.names,envir=environment())
+ tmp <- get(allele.names)[chanel]
+
+ if(is.null(npeak)) npeak <- length(unlist(tmp))
+ x <- calibr(peakabif(abifdata, chanel, npeak = npeak, ...)$maxis)
+ n <- length(x)
+
+ par(mfrow = c(1,1), mar = c(5,0,4,0)+0.1)
+
+ labels <- unlist(tmp)
+ col <- rep("black", n)
+ col[grep("\\.", labels)] <- "red"
+ y <- rep(1.1, n)
+ y[grep("\\.", labels)] <- 1.3
+
+ dyn <- abifdata$Data[[paste("DyeN", chanel, sep = ".")]]
+ main <- paste(abifdata$Data[["RunN.1"]], "\nwith fluorochrome", dyn)
+ main <- paste("Observed allelic ladder for", main)
+
+ plot(x, y, type = "h", ylim = c(0,1.5), col = col, yaxt = "n", ylab = "",
+ xlab = "Observed size [bp]", main = main)
+ text(x, y + 0.1, labels, srt = 90, col = col)
+
+ nlocus <- unlist(lapply(tmp, length))
+ nallploc <- unlist(lapply(tmp,function(x) sapply(x,length)))
+ loc.pos <- c(1, cumsum(nallploc[1:(nlocus-1)])+1)
+ locnames <- unlist(lapply(tmp, names))
+ rect(x[loc.pos], rep(0.4, nlocus), x[cumsum(nallploc)], rep(0.6, nlocus), col = "lightblue")
+ text(x[loc.pos], 0.5, locnames, pos = 4)
+ invisible(x)
+}
diff --git a/R/pmw.R b/R/pmw.R
new file mode 100644
index 0000000..1cbff9b
--- /dev/null
+++ b/R/pmw.R
@@ -0,0 +1 @@
+pmw <- function(seqaa, Ar = c(C = 12.0107, H = 1.00794, O = 15.9994,
N = 14.0067, P = 30.973762, S = 32.065), gravity = 9.81,
unit = "gram", checkseqaa = TRUE){
#
# Check arguments:
#
if( ! unit %in% c("gram", "N") ) stop("Non allowed unit argument")
if( any(Ar < 0)) stop("Negative Ar not allowed")
if( gravity < 0 ) stop("Antigravity not allowed")
if( nchar(seqaa[1]) > 1) stop("seqaa should be a vector of single chars")
allowed <- s2c("*ACDEFGHIKLMNPQRSTVWY")
if(checkseqaa) if(! all(seqaa %in% allowed) ) warning("Non allowed characters in seqaa")
#
# Compute aa frequencies:
#
comp <- table(factor(seqaa, levels = allowed))
if( sum(comp[-which(names(comp) == "*")]) == 0) stop("Zero length protein not allowed")
#
# Compute the molecular weigth of one mol of an amino-acid from its composition:
#
aamw <- function(chonps){
mw <- chonps[1]*Ar["C"] + chonps[2]*Ar["H"] + chonps[3]*Ar["O"] +
chonps[4]*Ar["N"] + chonps[5]*Ar["P"] + chonps[6]*Ar["S"]
if(unit == "gram"){
return(mw*gravity/9.81)
} else {
return(mw*gravity/1000)
}
}
#
# Compute molecular weight:
#
mw <- comp["A"]*aamw(c(3,7,2,1,0,0)) +
comp["C"]*aamw(c(3,7,2,1,0,1)) +
comp["D"]*aamw(c(4,7,4,1,0,0)) +
comp["E"]*aamw(c(5,9,4,1,0,0)) +
comp["F"]*aamw(c(9,11,2,1,0,0)) +
comp["G"]*aamw(c(2,5,2,1,0,0)) +
comp["H"]*aamw(c(6,9,2,3,0,0)) +
comp["I"]*aamw(c(6,13,2,1,0,0)) +
comp["K"]*aamw(c(6,14,2,2,0,0)) +
comp["L"]*aamw(c(6,13,2,1,0,0)) +
comp["M"]*aamw(c(5,11,2,1,0,1)) +
comp["N"]*aamw(c(4,8,3,2,0,0)) +
comp["P"]*aamw(c(5,9,2,1,0,0)) +
comp["Q"]*aamw(c(5,10,3,2,0,0)) +
comp["R"]*aamw(c(6,14,2,4,0,0)) +
comp["S"]*aamw(c(3,7,3,1,0,0)) +
comp["T"]*aamw(c(4,9,3,1,0,0)) +
comp["V"]*aamw(c(5,11,2,1,0,0)) +
comp["W"]*aamw(c(11,12,2,2,0,0)) +
comp["Y"]*aamw(c(9,11,3,1,0,0))
#
# Remove n - 1 water molecules:
#
mw <- mw - (sum(comp[-which(names(comp) == "*")]) - 1)*aamw(c(0,2,1,0,0,0))
names(mw) <- NULL
return(mw)
}
\ No newline at end of file
diff --git a/R/prepgetannots.R b/R/prepgetannots.R
new file mode 100644
index 0000000..0df33b6
--- /dev/null
+++ b/R/prepgetannots.R
@@ -0,0 +1,81 @@
+# ==> prep_getannots&nl=xx
+# key_name{|subkey_name} \n nl such lines sent to server
+# .... \n
+# <== code=0 \n
+# This command must be used before using the getannots or the modifylist&type=scan commands to specify
+# what sorts of annotation records will be returned by the getannots command or will be scanned.
+# nl: announces the number of key names that follow.
+# key_name: an annotation key name.
+# subkey_name: optionally, an annotation sub-item name (e.g., CDS when key_name = FT)
+# For the EMBL/SWISSPROT format, keys are: ALL, AC, DT, KW, OS, OC, OG, OH,
+# RN, RC, RP, RX, RA, RG, RT, RL, DR, AH, AS, CC, FH, FT, SQ, SEQ.
+# For GenBank: ALL, ACCESSION, VERSION, KEYWORDS, SOURCE, ORGANISM, REFERENCE, AUTHORS, CONSRTM,
+# TITLE, JOURNAL, PUBMED, REMARK, COMMENT, FEATURES, ORIGIN, SEQUENCE. Names of annotation subitems
+# (e.g., JOURNAL) must be entered with their 2 leading space characters.
+# For FT(embl,swissprot) and FEATURES(GenBank), one or more specific feature keys can be specified
+# using lines with only uppercase and such as
+# FEATURES|CDS
+# FT|TRNA
+# Keys ALL and SEQ/SEQUENCE stand for all annotation and sequence lines, respectively.
+# For the scan operation, key ALL stand for the DE/DEFINITION lines,
+# and SEQ/SEQUENCE cannot be used (annotations but not sequence are scanned).
+
+
+prepgetannots <- function(what = "all",
+ setfor = c("scan", "getannots"),
+ socket = autosocket(), verbose = FALSE){
+ #
+ # Default is to set for scan :
+ #
+ setfor <- setfor[1]
+ if(!(setfor %in% c("scan", "getannots"))) stop("Wrong setfor argument")
+ if(verbose) cat(paste("setfor is", setfor, "\n"))
+ #
+ # Get annotation lines names for current database:
+ #
+ annotlines <- cfl(socket = socket)$annotlines
+ if(verbose) cat("annotlines:\n", annotlines, "\n")
+ annotlines <- toupper(annotlines)
+ #
+ # Building the list of annotation lines:
+ #
+ if(what == "all"){
+ if(verbose) cat("what == all, turning all to true\n")
+ if(setfor == "scan"){
+ # For scan all but the first ("ALL") and the last ("SEQ*")
+ todo <- annotlines[2:(length(annotlines) - 1)]
+ } else {
+ # For getannots all but the first ("ALL")
+ todo <- annotlines[2:(length(annotlines))]
+ }
+ } else {
+ if(verbose) cat("what != all, user supplied list of annotation lines\n")
+ todo <- toupper(what)
+ }
+ #
+ # Sending request to server:
+ #
+ if(verbose) cat("todo:\n", todo, "\n")
+ request <- paste(paste("prep_getannots&nl=", length(todo), "\n", sep = ""),
+ paste(todo, collapse = "\n"), sep = "")
+ if(verbose) cat("Sending:\n", request)
+ writeLines(request, socket)
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ if(verbose) cat("... answer from server is:", answerFromServer, "\n")
+ resitem <- parser.socket(answerFromServer)
+ if(resitem[1] != "0"){
+ stop(paste("error code returned by server :", resitem[1]))
+ }
+ if(verbose) cat("... everything is OK up to now\n")
+ invisible(todo)
+}
+
+pga <- prepgetannots
+
diff --git a/R/prettyseq.R b/R/prettyseq.R
new file mode 100644
index 0000000..50ba96c
--- /dev/null
+++ b/R/prettyseq.R
@@ -0,0 +1,47 @@
+#==> prettyseq&num=xx{&bpl=xx}{&translate=[T|F]}
+#<== code=0\n
+#line1
+#line2
+#...
+#prettyseq END.\n
+#To get a text representation of sequence of rank num and of its subsequences,
+#with bpl bases per line (default = 60), and with optional translation of
+#protein-coding subsequences.
+
+prettyseq <- function(num, bpl = 60, translate = TRUE, socket = autosocket()){
+ #
+ # Build request:
+ #
+ request <- paste("prettyseq&num=", formatC(num, format = "d"),
+ "&bpl=", bpl, "&translate=", ifelse(translate,"T", "F"), sep = "")
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ #
+ # Check that no error code is returned by server:
+ #
+ if( !substr(x = answerFromServer, start = 6, stop = 6) == "0"){
+ warning(paste("Server returned error code:", answerFromServer))
+ return(NA)
+ }
+ #
+ # Extract sequence from server:
+ #
+ result <- readLines(socket)
+ #
+ # Check the end:
+ #
+ if( result[length(result)] != "prettyseq END."){
+ warning("Incomplete answer from server")
+ return(NA)
+ }
+ result <- result[-length(result)] # remove last line
+ cat(result, sep = "\n")
+ invisible(result)
+}
diff --git a/R/query.r b/R/query.r
new file mode 100644
index 0000000..5721bc7
--- /dev/null
+++ b/R/query.r
@@ -0,0 +1,137 @@
+###############################################################################
+# #
+# query #
+# #
+###############################################################################
+
+query <- function(listname, query, socket = autosocket(), invisible = TRUE, verbose = FALSE, virtual = FALSE)
+{
+ #
+ # Use list1 as listname if the argument is missing:
+ #
+ if(missing(query)){
+ query <- listname
+ listname <- "list1"
+ }
+ #
+ # Check arguments:
+ #
+ if(verbose) cat("I'm checking the arguments...\n")
+
+ if( !inherits(socket, "sockconn") ) stop(paste("argument socket = ", socket, "is not a socket connection."))
+ if( !is.character(listname) ) stop(paste("argument listname = ", listname, "is not a character string."))
+ if( !is.character(query) ) stop(paste("argument query = ", query, "is not a character string."))
+ if( !is.logical(invisible) ) stop(paste("argument invisible = ", invisible, "should be TRUE or FALSE."))
+ if( is.na(invisible) ) stop(paste("argument invisible = ", invisible, "should be TRUE or FALSE."))
+ if(verbose) cat("... and everything is OK up to now.\n")
+
+ #
+ # Check the status of the socket connection:
+ #
+ if(verbose) cat("I'm checking the status of the socket connection...\n")
+ #
+ # Ca marche pas: summary.connection leve une exception et on ne va pas plus loin
+ #
+ if(!isOpen(socket)) stop(paste("socket:", socket, "is not opened."))
+ if(!isOpen(socket, rw = "read")) stop(paste("socket:", socket, "can not read."))
+ if(!isOpen(socket, rw = "write")) stop(paste("socket:", socket, "can not write."))
+ if(verbose) cat("... and everything is OK up to now.\n")
+
+ #
+ # Send request to server:
+ #
+ if(verbose) cat("I'm sending query to server...\n")
+ request <- paste("proc_requete&query=\"", query, "\"&name=\"", listname, "\"", sep = "")
+ writeLines(request, socket, sep = "\n")
+ res <- readLines(socket, n = 1)
+ #
+ # C'est ici qu'il y a un probleme de timeout. Suit un patch pas beau
+ #
+
+ if(verbose) cat(paste("... answer from server is:", res, "\n"))
+ if(length(res) == 0){
+ if(verbose) cat("... answer from server is empty!\n")
+ # Modif de Simon suite au mail de Augusto Ribas
+ maxIter <- 10
+ attemptNumber <- 0
+ while((length(res) == 0) & (attemptNumber < maxIter)) {
+ if(verbose) cat("... reading again (",attemptNumber,").\n")
+ res <- readLines(socket, n = 1)
+ attemptNumber <- attemptNumber+1
+ }
+ if(length(res) == 0){
+ stop(paste("Unable to get any answer from socket after ",attemptNumber , " trials."))
+ }
+ }
+ #
+ # Analysing answer from server:
+ #
+ if(verbose) cat("I'm trying to analyse answer from server...\n")
+ p <- parser.socket(res)
+ if(p[1] != "0"){
+ if(verbose) cat("... and I was able to detect an error.\n")
+ stop(paste("invalid request:", p[2], sep = ""))
+ }
+
+ if(verbose) cat("... and everything is OK up to now.\n")
+ lrank <- p[2]
+ if(verbose) cat(paste("... and the rank of the resulting list is:", lrank, ".\n"))
+ nelem <- as.integer(p[3])
+ if(verbose) cat(paste("... and there are", nelem, "elements in the list.\n"))
+ typelist <- p[4]
+ if(verbose) cat(paste("... and the elements in the list are of type", typelist, ".\n"))
+ if(typelist == "SQ"){
+ if(p[5] == "T"){
+ if(verbose) cat("... and there are only parent sequences in the list.\n")
+ } else {
+ if(verbose) cat("... and there are *not* only parent sequences in the list.\n")
+ }
+ }
+
+ #
+ # Get full list informations:
+ #
+ if( !virtual ){
+ if(verbose) cat("I'm trying to get the infos about the elements of the list...\n")
+ writeLines(paste("nexteltinlist&lrank=", lrank, "&first=1&count=", nelem, sep = ""), socket, sep = "\n")
+ res <- readLines(socket, n = nelem, ok = FALSE)
+ if( length(res) != nelem )
+ {
+ if(verbose) cat("... and I was able to detect an error...\n")
+ stop(paste("only", length(res), "list elements were send by server out of", nelem, "expected.\n"))
+ } else {
+ if(verbose) cat(paste("... and I have received", nelem, "lines as expected.\n"))
+ }
+ #
+ # Extracting info
+ #
+ req <- vector(mode = "list", length = nelem)
+ for(i in seq_len(nelem)){
+ x <- parser.socket(res[i])
+ req[[i]] <- as.SeqAcnucWeb(substr(x[2], 2, nchar(x[2]) - 1), x[3], x[6], x[7])
+ }
+ #
+ # Virtual list case:
+ #
+ } else {
+ if(verbose) cat("I'am *not* trying the infos about the elements of the list since virtual is TRUE.\n")
+ req <- NA
+ }
+ #
+ # Assign results in user workspace:
+ #
+ result <- list(call = match.call(), name = listname, nelem = nelem, typelist = typelist,
+ req = req, socket = socket)
+ class(result) <- c("qaw")
+ assign(listname, result, envir = .seqinrEnv)
+}
+
+#
+# Print method:
+#
+
+print.qaw <- function(x, ...)
+{
+ if(is.null(x$call$query)) x$call$query <- x$call$listname
+ cat(x$nelem, x$type, "for", x$call$query)
+}
diff --git a/R/quitacnuc.R b/R/quitacnuc.R
new file mode 100644
index 0000000..2a373e1
--- /dev/null
+++ b/R/quitacnuc.R
@@ -0,0 +1,12 @@
+# ==> quit
+# <== OK acnuc socket closed
+# To close the socket and stop communication over it.
+
+quitacnuc <- function(socket){
+ writeLines("quit", socket, sep = "\n")
+ rep <- readLines(socket, n = 1)
+ if(rep != "OK acnuc socket stopped"){
+ stop("I do not understand answer for quitacnuc from server, please contact package maintainer.\n")
+ }
+}
+
diff --git a/R/read.abif.R b/R/read.abif.R
new file mode 100644
index 0000000..76845ba
--- /dev/null
+++ b/R/read.abif.R
@@ -0,0 +1,156 @@
+read.abif <- function(filename, max.bytes.in.file = file.info(filename)$size,
+ pied.de.pilote = 1.2, verbose = FALSE){
+ #
+ # Suppress warnings when reading strings with internal nul character:
+ #
+ RTC <- function(x, ...) suppressWarnings(rawToChar(x, ...))
+ #
+ # Define some shortcuts:
+ #
+ SInt32 <- function(f, ...) readBin(f, what = "integer", signed = TRUE, endian = "big", size = 4, ...)
+ SInt16 <- function(f, ...) readBin(f, what = "integer", signed = TRUE, endian = "big", size = 2, ...)
+ SInt8 <- function(f, ...) readBin(f, what = "integer", signed = TRUE, endian = "big", size = 1, ...)
+ UInt32 <- function(f, ...) readBin(f, what = "integer", signed = FALSE, endian = "big", size = 4, ...)
+ UInt16 <- function(f, ...) readBin(f, what = "integer", signed = FALSE, endian = "big", size = 2, ...)
+ UInt8 <- function(f, ...) readBin(f, what = "integer", signed = FALSE, endian = "big", size = 1, ...)
+ f32 <- function(f, ...) readBin(f, what = "numeric", size = 4,endian="little", ...)
+ f64 <- function(f, ...) readBin(f, what = "numeric", size = 8, endian="little",...)
+ #
+ # Load raw data in memory:
+ #
+ fc <- file(filename, open = "rb")
+ rawdata <- readBin(fc, what = "raw", n = pied.de.pilote*max.bytes.in.file,endian="little")
+ if(verbose) print(paste("number of bytes in file", filename, "is", length(rawdata)))
+ close(fc)
+ #
+ # Make a list to store results:
+ #
+ res <- list(Header = NULL, Directory = NA, Data = NA)
+ #
+ # Header section is 128 bytes long, located at a fixed position at the
+ # beginning of the file. We essentially need the number of item and dataoffset
+ #
+ res$Header$abif <- RTC(rawdata[1:4])
+ if(res$Header$abif != "ABIF") stop("file not in ABIF format")
+ if(verbose) print("OK: File is in ABIF format")
+
+ res$Header$version <- SInt16(rawdata[5:6])
+ if(verbose) print(paste("File in ABIF version", res$Header$version/100))
+
+ res$Header$DirEntry.name <- rawdata[7:10]
+ if(verbose) print(paste("DirEntry name: ", RTC(res$Header$DirEntry.name)))
+
+ res$Header$DirEntry.number <- SInt32(rawdata[11:14])
+ if(verbose) print(paste("DirEntry number: ", res$Header$DirEntry.number))
+
+ res$Header$DirEntry.elementtype <- SInt16(rawdata[15:16])
+ if(verbose) print(paste("DirEntry elementtype: ", res$Header$DirEntry.elementtype))
+
+ res$Header$DirEntry.elementsize <- SInt16(rawdata[17:18])
+ if(verbose) print(paste("DirEntry elementsize: ", res$Header$DirEntry.elementsize))
+
+ # This one is important:
+ res$Header$numelements <- SInt32(rawdata[19:22])
+ if(verbose) print(paste("DirEntry numelements: ", res$Header$numelements))
+
+ # This one is important too:
+ res$Header$dataoffset <- SInt32(rawdata[27:30])
+ if(verbose) print(paste("DirEntry dataoffset: ", res$Header$dataoffset))
+ dataoffset <- res$Header$dataoffset + 1 # start position is 1 in R vectors
+
+ res$Header$datahandle <- SInt32(rawdata[31:34])
+ if(verbose) print(paste("DirEntry datahandle: ", res$Header$datahandle))
+
+ res$Header$unused <- SInt16(rawdata[35:128], n = 47)
+ # Should be ingnored and set to zero
+ res$Header$unused[1:length(res$Header$unused)] <- 0
+ if(verbose) print(paste("DirEntry unused: ", length(res$Header$unused), "2-byte integers"))
+
+ #
+ # The directory is located at the offset specified in the header,
+ # and consist of an array of directory entries.
+ # We scan the directory to put values in a data.frame:
+ #
+ dirdf <- data.frame(list(name = character(0)))
+ dirdf$name <- as.character(dirdf$name) # force to characters
+
+ for(i in seq_len(res$Header$numelements)){
+ deb <- (i-1)*res$Header$DirEntry.elementsize + dataoffset
+ direntry <- rawdata[deb:(deb + res$Header$DirEntry.elementsize)]
+ dirdf[i, "name"] <- RTC(direntry[1:4])
+ dirdf[i, "tagnumber"] <- SInt32(direntry[5:8])
+ dirdf[i, "elementtype"] <- SInt16(direntry[9:10])
+ dirdf[i, "elementsize"] <- SInt16(direntry[11:12])
+ dirdf[i, "numelements"] <- SInt32(direntry[13:16])
+ dirdf[i, "datasize"] <- SInt32(direntry[17:20])
+ dirdf[i, "dataoffset"] <- SInt32(direntry[21:24])
+ }
+ if(verbose){
+ print("Element found:")
+ print(dirdf$name)
+ }
+ #
+ # Save Directory and make a list to store data:
+ #
+ res$Directory <- dirdf
+ res$Data <- vector("list", nrow(dirdf))
+ names(res$Data) <- paste(dirdf$name, dirdf$tagnumber, sep = ".")
+ #
+ # Data extraction:
+ #
+ for(i in seq_len(res$Header$numelements)){
+ deb <- (i-1)*res$Header$DirEntry.elementsize + dataoffset
+ # Short data are stored in dataoffset directly:
+ if(dirdf[i, "datasize"] > 4){
+ debinraw <- dirdf[i, "dataoffset"] + 1
+ } else {
+ debinraw <- deb + 20
+ }
+ elementtype <- dirdf[i, "elementtype"]
+ numelements <- dirdf[i, "numelements"]
+ elementsize <- dirdf[i, "elementsize"]
+ data <- rawdata[debinraw:(debinraw + numelements*elementsize)]
+ # unsigned 8 bits integer:
+ if(elementtype == 1) res$Data[[i]] <- UInt8(data, n = numelements)
+ # char or signed 8 bits integer
+ if(elementtype == 2){
+ res$Data[[i]] <- tryCatch(RTC(data),finally=paste(rawToChar(data,multiple=TRUE),collapse=""),error=function(er){cat(paste("an error was detected with the following message:",er," but this error was fixed\n",sep=" "))})
+ }
+
+ # unsigned 16 bits integer:
+ if(elementtype == 3) res$Data[[i]] <- UInt16(data, n = numelements)
+ # short:
+ if(elementtype == 4) res$Data[[i]] <- SInt16(data, n = numelements)
+ # long:
+ if(elementtype == 5) res$Data[[i]] <- SInt32(data, n = numelements)
+ # float:
+ if(elementtype == 7) res$Data[[i]] <- f32(data, n = numelements)
+ # double:
+ if(elementtype == 8) res$Data[[i]] <- f64(data, n = numelements)
+ # date:
+ if(elementtype == 10)
+ res$Data[[i]] <- list(year = SInt16(data, n = 1),
+ month = UInt8(data[-(1:2)], n = 1), day = UInt8(data[-(1:3)], n = 1))
+ # time:
+ if(elementtype == 11)
+ res$Data[[i]] <- list(hour = UInt8(data, n = 1),
+ minute = UInt8(data[-1], n = 1), second = UInt8(data[-(1:2)], n = 1),
+ hsecond = UInt8(data[-(1:3)], n = 1))
+ # pString:
+ if(elementtype == 18){
+ n <- SInt8(rawdata[debinraw])
+ pString <- RTC(rawdata[(debinraw+1):(debinraw+n)])
+ res$Data[[i]] <- pString
+ }
+ # cString:
+ if(elementtype == 19) res$Data[[i]] <- RTC(data[1:(length(data) - 1) ])
+ # user:
+ if(elementtype >= 1024) res$Data[[i]] <- data
+ # legacy:
+ if(elementtype %in% c(12, 13))
+ warning("unimplemented legacy type found in file")
+ if(elementtype %in% c(6, 9, 14, 15, 16, 17, 20, 128, 256, 384))
+ warning("unsupported legacy type found in file")
+ }
+ return(res)
+}
diff --git a/R/read.alignment.R b/R/read.alignment.R
new file mode 100644
index 0000000..b7c6b67
--- /dev/null
+++ b/R/read.alignment.R
@@ -0,0 +1,35 @@
+#
+# Read files of aligned sequences in various formats
+#
+read.alignment <- function(file, format, forceToLower = TRUE)
+{
+ #
+ # Check that we have read permission on the file:
+ #
+ file <- path.expand(file)
+ if(file.access(file, mode = 4) != 0) stop(paste("File", file, "is not readable"))
+
+ ali <- switch( format,
+ fasta = .Call("read_fasta_align", file, PACKAGE = "seqinr"),
+ FASTA = .Call("read_fasta_align", file, PACKAGE = "seqinr"),
+ mase = .Call("read_mase", file, PACKAGE = "seqinr"),
+ MASE = .Call("read_mase", file, PACKAGE = "seqinr"),
+ phylip = .Call("read_phylip_align", file, PACKAGE = "seqinr"),
+ PHYLIP = .Call("read_phylip_align", file, PACKAGE = "seqinr"),
+ msf = .Call("read_msf_align", file, PACKAGE = "seqinr"),
+ MSF = .Call("read_msf_align", file, PACKAGE = "seqinr"),
+ CLUSTAL = .Call("read_clustal_align", file, PACKAGE = "seqinr"),
+ clustal = .Call("read_clustal_align", file, PACKAGE = "seqinr"),
+ stop("Wrong format name: Format available are fasta,mase,phylip,msf,clustal")
+ )
+
+ ali <- lapply(ali, as.character)
+ if(forceToLower) ali[[3]] <- lapply(ali[[3]], tolower)
+ if(format == "mase"){
+ ali <- list(nb = as.numeric(ali[[1]]), nam = ali[[2]], seq = ali[[3]], com = ali[[4]])
+ } else {
+ ali <- list(nb = as.numeric(ali[[1]]), nam = ali[[2]], seq = ali[[3]], com = NA)
+ }
+ class(ali) <- "alignment"
+ return(ali)
+}
diff --git a/R/read.fasta.R b/R/read.fasta.R
new file mode 100755
index 0000000..be261b2
--- /dev/null
+++ b/R/read.fasta.R
@@ -0,0 +1,150 @@
+read.fasta <- function(file = system.file("sequences/ct.fasta.gz", package = "seqinr"),
+ seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE,
+ set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE, strip.desc = FALSE,
+ bfa = FALSE, sizeof.longlong = .Machine$sizeof.longlong,
+ endian = .Platform$endian, apply.mask = TRUE)
+{
+
+ seqtype <- match.arg(seqtype) # default is DNA
+
+ ##############################
+ #
+ # Regular flat FASTA text file
+ #
+ ##############################
+ if(!bfa){ # Regular text file
+ #
+ # Read the fasta file as a vector of strings:
+ #
+ lines <- readLines(file)
+ #
+ # Remove comment lines starting with a semicolon ';'
+ #
+ if(legacy.mode){
+ comments <- grep("^;", lines)
+ if(length(comments) > 0) lines <- lines[-comments]
+ }
+ #
+ # Get the line numbers where sequences names are:
+ #
+ ind <- which(substr(lines, 1L, 1L) == ">")
+ #
+ # Compute the total number of sequences:
+ #
+ nseq <- length(ind)
+ if(nseq == 0){
+ stop("no line starting with a > character found")
+ }
+ #
+ # Localize sequence data:
+ #
+ start <- ind + 1
+ end <- ind - 1
+ end <- c(end[-1], length(lines))
+ #
+ # Read sequences:
+ #
+ sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = ""))
+ if(seqonly) return(sequences)
+ #
+ # Read sequence names:
+ #
+ nomseq <- lapply(seq_len(nseq), function(i){
+ firstword <- strsplit(lines[ind[i]], " ")[[1]][1]
+ substr(firstword, 2, nchar(firstword))
+ })
+ #
+ # Turn DNA sequences in lower case letters if required:
+ #
+ if(seqtype == "DNA"){
+ if(forceDNAtolower){
+ sequences <- as.list(tolower(sequences))
+ }
+ }
+ #
+ # Turn it into a vector of single chars if required:
+ #
+ if(as.string == FALSE) sequences <- lapply(sequences, s2c)
+ #
+ # Set sequence attributes when required:
+ #
+ if(set.attributes){
+ for(i in seq_len(nseq)){
+ Annot <- lines[ind[i]]
+ if(strip.desc) Annot <- substr(Annot, 2L, nchar(Annot))
+ attributes(sequences[[i]]) <- list(name = nomseq[[i]],
+ Annot = Annot,
+ class = switch(seqtype, "AA" = "SeqFastaAA", "DNA" = "SeqFastadna"))
+ }
+ }
+ #
+ # Give the sequences names to the list elements:
+ #
+ names(sequences) <- nomseq
+ return(sequences)
+ }
+ ##############################
+ #
+ # MAQ binary FASTA file
+ #
+ ##############################
+ if(bfa){
+ if(seqtype != "DNA") stop("binary fasta file available for DNA sequences only")
+ # Open file in binary mode:
+ mycon <- file(file, open = "rb")
+ r2s <- words(4) # byte to tetranucleotide
+
+ readOneBFARecord <- function(con, sizeof.longlong, endian, apply.mask){
+ len <- readBin(con, n = 1, what = "int", endian = endian)
+ if(length(len) == 0) return(NULL) # end of file
+ name <- readBin(con, n = 1, what = "character", endian = endian)
+ ori_len <- readBin(con, n = 1, what = "int", endian = endian)
+ len <- readBin(con, n = 1, what = "int", endian = endian)
+ seq <- readBin(con, n = len*sizeof.longlong, what = "raw", size = 1, endian = endian)
+ mask <- readBin(con, n = len*sizeof.longlong, what = "raw", size = 1, endian = endian)
+ if(endian == "little"){
+ neword <- sizeof.longlong:1 +
+ rep(seq(0, (len - 1)*sizeof.longlong, by = sizeof.longlong),
+ each = sizeof.longlong)
+ # something like 8 7 6 5 4 3 2 1 16 15 14 13 12 11 10 9 ...
+ seq <- seq[neword]
+ mask <- mask[neword]
+ }
+ seq4 <- c2s(r2s[as.integer(seq) + 1])
+ seq4 <- substr(seq4, 1, ori_len)
+ if(apply.mask){
+ mask4 <- c2s(r2s[as.integer(mask) + 1])
+ mask4 <- substr(mask4, 1, ori_len)
+ npos <- gregexpr("a", mask4, fixed = TRUE)[[1]]
+ for(i in npos) substr(seq4, i, i + 1) <- "n"
+ }
+ return(list(seq = seq4, name = name))
+ } # end readOneBFARecord
+
+ sequences <- vector(mode = "list")
+ nomseq <- vector(mode = "list")
+ i <- 1
+ repeat{
+ res <- readOneBFARecord(mycon, sizeof.longlong, endian, apply.mask)
+ if(is.null(res)) break
+ sequences[[i]] <- res$seq
+ nomseq[[i]] <- res$name
+ i <- i + 1
+ }
+ close(mycon)
+ nseq <- length(sequences)
+ if(seqonly) return(sequences)
+ if(as.string == FALSE) sequences <- lapply(sequences, s2c)
+ if(set.attributes){
+ for(i in seq_len(nseq)){
+ if(!strip.desc) Annot <- c2s(c(">", nomseq[[i]]))
+ attributes(sequences[[i]]) <- list(name = nomseq[[i]],
+ Annot = Annot, class = "SeqFastadna")
+ }
+ }
+ names(sequences) <- nomseq
+ return(sequences)
+ }
+}
+
+
diff --git a/R/readBins.R b/R/readBins.R
new file mode 100644
index 0000000..1eaf5aa
--- /dev/null
+++ b/R/readBins.R
@@ -0,0 +1,36 @@
+readBins <- function(file,
+ colnames = c("allele.name", "size.bp", "minus.bp", "plus.bp")){
+ src <- readLines(file)
+ iPanel <- which(substr(src, start = 1, stop = 11) == "Panel Name\t")
+ mycon <- textConnection(src[1:3])
+ infos <- read.table(mycon, sep = "\t", fill = TRUE, header = FALSE)
+ close(mycon)
+ result <- list(infos = infos)
+
+ starts <- iPanel + 1
+ stops <- c(iPanel[-1] - 1, length(src))
+ for(i in seq_len(length(iPanel))){
+ locsrc <- src[starts[i]:stops[i]]
+ iMark <- which(substr(locsrc, start = 1, stop = 12) == "Marker Name\t")
+ locres <- vector(mode = "list", length = length(iMark))
+ locstarts <- iMark + 1
+ locstops <- c(iMark[-1] - 1, length(locsrc))
+ for(j in seq_len(length(iMark))){
+ mycon <- textConnection(locsrc[locstarts[j]:locstops[j]])
+ locres[[j]] <- read.table(mycon, sep = "", fill = TRUE) # changed
+ colnames(locres[[j]])[1:length(colnames)] <- colnames
+ close(mycon)
+ names(locres)[j] <- unlist(strsplit(locsrc[iMark[j]], split = "\t"))[2]
+ }
+ #
+ # Check that the number of columns is 4 for all loci:
+ #
+ ncols <- sapply(locres, ncol)
+ if(any((ncols != 4))){
+ warning("A problem may have occur during importation")
+ }
+ result[[i+1]] <- locres
+ names(result)[i+1] <- unlist(strsplit(src[iPanel[i]], split = "\t"))[2]
+ }
+ return(result)
+}
diff --git a/R/readPanels.R b/R/readPanels.R
new file mode 100644
index 0000000..4acbf03
--- /dev/null
+++ b/R/readPanels.R
@@ -0,0 +1,42 @@
+readPanels <- function(file,
+ colnames = c("marker", "dye.col", "min.bp", "max.bp", "exp.pcg", "repeat.bp",
+ "stutter.pc", "uknw", "allele names")){
+ src <- readLines(file)
+ iPanel <- which(substr(src, start = 1, stop = 6) == "Panel\t")
+
+ infos <- src[1:(iPanel[1] - 1)]
+ result <- list(infos = infos)
+
+ starts <- iPanel + 1
+ stops <- c(iPanel[-1] - 1, length(src))
+
+ for(i in seq_len(length(iPanel))){
+ toimport <- src[starts[i]:stops[i]]
+ # Change all runs of tabulations by a single one:
+ toimport <- gsub("\t{2,}", "\t", toimport)
+ mycon <- textConnection(toimport)
+ result[[i+1]] <- read.table(mycon, sep = "\t", quote = "")
+ close(mycon)
+ # remove empty columns
+ # (I guess this is no more necessary now that runs of
+ # tabulation are preprocessed)
+ tokeep <- rep(TRUE, ncol(result[[i+1]]))
+ for(j in 1:ncol(result[[i+1]])){
+ if(all(is.na(result[[i+1]][,j]))){
+ tokeep[j] <- FALSE
+ }
+ }
+ result[[i+1]] <- result[[i+1]][, tokeep]
+ # There must be 9 columns
+ if(ncol(result[[i+1]]) != 9) stop("wrong column number")
+ colnames(result[[i+1]]) <- colnames
+
+ headeritems <- unlist(strsplit(src[iPanel[i]], split = "\t"))
+ # remove empty elements
+ if(any(nchar(headeritems) == 0)){
+ headeritems <- headeritems[-which(nchar(headeritems) == 0)]
+ }
+ names(result)[i+1] <- headeritems[2]
+ }
+ return(result)
+}
diff --git a/R/readfirstrec.R b/R/readfirstrec.R
new file mode 100644
index 0000000..2aef037
--- /dev/null
+++ b/R/readfirstrec.R
@@ -0,0 +1,46 @@
+###################################################################################################
+# #
+# readfirstrec #
+# #
+# Returns the record count of the specified ACNUC index file. # #
+# #
+# ==> readfirstrec&type=[AUT|BIB|ACC|SMJ|SUB|LOC|KEY|SPEC|SHRT|LNG|EXT|TXT] #
+# <== code=xx&count=xx #
+# Returns the record count of the specified ACNUC index file. #
+# Code != 0 indicates error. #
+# #
+###################################################################################################
+
+readfirstrec <- function(socket = autosocket(), type)
+{
+ allowedtype <- c("AUT", "BIB", "ACC", "SMJ", "SUB", "LOC", "KEY", "SPEC",
+ "SHRT", "LNG", "EXT", "TXT")
+ if(missing(type)){
+ return(allowedtype)
+ }
+ #
+ # Build the request:
+ #
+ request <- paste("readfirstrec&type=", type, sep = "", collapse = "")
+ #
+ # Send request:
+ #
+ writeLines(request, socket, sep = "\n")
+ #
+ # Read answer from server:
+ #
+
+ s <- readLines(socket, n = 1)
+ rep <- parser.socket(s)
+
+ #
+ # Check answer from server:
+ #
+ if(rep[1] != "0"){
+ warning("Server returns an error")
+ return(NA)
+ } else {
+ return(as.numeric(rep[2]))
+ }
+}
+
diff --git a/R/readsmj.R b/R/readsmj.R
new file mode 100644
index 0000000..848f169
--- /dev/null
+++ b/R/readsmj.R
@@ -0,0 +1,91 @@
+###################################################################################################
+# #
+# readsmj #
+# #
+# #
+# ==> readsmj&num=xx&nl=xx #
+# <== code=xx&nl=xx #
+# recnum=xx&name="xx"&plong=xx{&libel="xx"} #
+# ... a series of nl lines like that ... #
+# Returns data from nl consecutive records starting from rank num of file SMJYT including label #
+# if not empty. #
+# Code != 0 indicates error. #
+# #
+###################################################################################################
+
+readsmj <- function(socket = autosocket(), num = 2, nl = 10, recnum.add = FALSE, nature.add = TRUE,
+plong.add = FALSE, libel.add = FALSE, sname.add = FALSE, all.add = FALSE)
+{
+ #
+ # Turn all flags to TRUE when requested:
+ #
+ if(all.add) sname.add <- libel.add <- plong.add <- nature.add <- recnum.add <- TRUE
+
+
+ #
+ # Build the request:
+ #
+ request <- paste("readsmj&num=", num, "&nl=", nl, sep = "", collapse = "")
+
+ #
+ # Send request:
+ #
+
+ writeLines(request, socket, sep = "\n")
+ #
+ # Read answer from server:
+ #
+ s <- readLines(socket, n = 1)
+ rep <- parser.socket(s)
+
+ #
+ # check answer from server:
+ #
+ if(rep[1] != "0") stop("Error from server")
+
+ #
+ # read answer from server:
+ #
+ n <- as.numeric(rep[2])
+ ans <- readLines(socket, n = n)
+
+ #
+ # Put answer into a data.frame:
+ #
+ name <- character(n) # the only mandatory one
+ if(recnum.add) recnum <- numeric(n)
+ if(nature.add) nature <- factor(character(n), levels = c("00","01","02","03","04","05","06","07"))
+ if(plong.add) plong <- numeric(n)
+ if(libel.add) libel <- character(n)
+ if(sname.add) sname <- character(n)
+ for(i in seq_len(n)){
+ tmp <- parser.socket(ans[[i]])
+ name[i] <- substr(tmp[2], 2, nchar(tmp[2]) - 1)
+ if(name[i] == "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"){
+ name[i] <- NA
+ next
+ }
+ if(sname.add) sname[i] <- substr(name[i], 3, nchar(name[i]))
+ if(recnum.add) recnum[i] <- tmp[1]
+ if(nature.add) nature[i] <- substr(name[i], 1, 2)
+ if(plong.add) plong[i] <- tmp[3]
+ if(libel.add){
+ if(length(tmp) == 4){
+ libel[i] <- substr(tmp[4], 2, nchar(tmp[4]) - 1)
+ } else {
+ libel[i] <- NA
+ }
+ }
+ }
+ df <- data.frame(list(name = I(name)))
+ if(sname.add) df$sname <- sname
+ if(recnum.add) df$recnum <- recnum
+ if(nature.add){
+ levels(nature) <- c("status", "molecule", "journal", "year", "type", "organelle", "division", "dbstrucinfo")
+ df$nature <- nature
+ }
+ if(plong.add) df$plong <- plong
+ if(libel.add) df$libel <- libel
+
+ return(df)
+}
diff --git a/R/rearranged.oriloc.R b/R/rearranged.oriloc.R
new file mode 100644
index 0000000..4300c3d
--- /dev/null
+++ b/R/rearranged.oriloc.R
@@ -0,0 +1,65 @@
+#############################################################################
+# Rearranged oriloc #
+# #
+# Detection of replication-associated effects on base composition asymmetry #
+# #
+#############################################################################
+
+rearranged.oriloc <- function(
+ seq.fasta = system.file("sequences/ct.fasta.gz", package = "seqinr") ,
+ g2.coord = system.file("sequences/ct.predict", package = "seqinr") )
+ {
+
+ seq.fasta <- read.fasta(seq.fasta)[[1]]
+ g2 <- readLines(g2.coord)
+ g2 <- lapply(g2, function(x) unlist(strsplit(x, split=" ")))
+ g2 <- lapply(g2, function(x) x[which(x!="")])
+
+ start <-as.numeric(unlist(lapply(g2, function(x) x[2])))
+ end <- as.numeric(unlist(lapply(g2, function(x) x[3])))
+ strand <- rep("forward",length(start))
+ strand[which(end<start)] <- "reverse"
+
+
+ meancoord <- (start+end)/2
+
+ ncds <- length(start)
+
+ gcskew <- numeric(ncds)
+ atskew <- numeric(ncds)
+
+ for(i in seq_len(ncds)){
+
+ cds=seq.fasta[start[i]:end[i]]
+
+ g3=sum(cds[seq(from=3,to=length(cds),by=3)]%in%c("g","G"))
+ c3=sum(cds[seq(from=3,to=length(cds),by=3)]%in%c("c","C"))
+ t3=sum(cds[seq(from=3,to=length(cds),by=3)]%in%c("t","T"))
+ a3=sum(cds[seq(from=3,to=length(cds),by=3)]%in%c("a","A"))
+
+ gcskew[i]=(g3-c3)/(g3+c3)
+ atskew[i]=(a3-t3)/(a3+t3)
+ if(is.na(gcskew[i])){
+ gcskew[i]=0
+ }
+ if(is.na(atskew[i])){
+ atskew[i]=0
+ }
+
+ }
+
+ neworder=c(which(strand=="forward"),which(strand=="reverse"))
+
+ meancoord.rear <- seq_len(ncds)
+ gcskew.rear=gcskew[neworder]
+ atskew.rear=atskew[neworder]
+ strand.rear=strand[neworder]
+ meancoord.real=meancoord[neworder]
+ data=data.frame(meancoord.rear,gcskew.rear,atskew.rear,strand.rear,neworder,meancoord.real)
+
+ colnames(data)=c("meancoord.rearr","gcskew.rearr","atskew.rearr","strand.rearr","order","meancoord.real")
+
+ return(data)
+
+
+ }
diff --git a/R/recstat.R b/R/recstat.R
new file mode 100755
index 0000000..a7d11da
--- /dev/null
+++ b/R/recstat.R
@@ -0,0 +1,65 @@
+##
+# This function counts the number of each triplet in a sliding window,
+# create a contingency table with triplet composition and then computes
+# a correspondence analysis on this table
+##
+#v.18.08.2011
+recstat <- function(seq, sizewin = 90, shift = 30, seqname = "no name")
+{
+ if (is.character(seq) == FALSE)
+ { # class character if only one sequence, class list if more
+ print("This file has more than one sequence or class is not character.")
+ return()
+ }
+ if ((shift%%3) != 0)
+ { # test if shift give the same reading frame
+ print("The windows are not in the same reading frame, please change shift value to a multiple of 3.")
+ return()
+ }
+ if (sizewin%%3 != 0)
+ {
+ print("The length of the window is not a multiple of 3, please change sizewin value.")
+ return()
+ }
+ seqsize <- length(seq) # give the number of 1-mer in the sequence
+ v1 <- seq(from = 1, to = seqsize, by = shift) # start vector of window in reading frame 1
+ v1 <- v1[1:(which((v1 + sizewin) > seqsize)[1])] # suppression if more than one incomplete window
+ v2 <- seq(from = 2, to = seqsize, by = shift)
+ v2 <- v2[1:(which((v2 + sizewin)>seqsize)[1])]
+ v3 <- seq(from = 3, to = seqsize, by = shift)
+ v3 <- v3[1:(which((v3 + sizewin) > seqsize)[1])]
+ vdep <- c(v1, v2, v3) # start vector in the 3 reading frames of direct/reverse strand
+ vind <- c(rep(1, length(v1)), rep(2, length(v2)), rep(3, length(v3))) # index vector of reading frame for each window
+ ##
+ ##direct strand##
+ ##
+ cseq <- c2s(seq)
+ vstopd <- c(words.pos("taa", cseq), words.pos("tag", cseq), words.pos("tga", cseq)) # vector of stop codons positions in direct strand
+ vinitd <- c(words.pos("atg", cseq)) # vector of start codons positions in direct strand
+ resd <- lapply(1:length(vdep), function(x)
+ { # calculation on 3 reading frames of direct strand
+ seq_tmp <- seq[(vdep[x]):(vdep[x] + sizewin - 1)] # temporary window
+ count(seq_tmp, wordsize = 3, start = 0, by = 3) # counting of triplets
+ })
+ ##
+ ##reverse strand##
+ ##
+ seq_reverse <- rev(comp(seq, ambiguous = TRUE)) # creation of reverse strand
+ cseq_reverse <- c2s(seq_reverse)
+ vstopr <- c(words.pos("taa", cseq_reverse), words.pos("tag", cseq_reverse), words.pos("tga", cseq_reverse)) # vector of stop codons positions in reverse strand
+ vinitr <- c(words.pos("atg", cseq_reverse)) # vector of init codons positions in reverse strand
+ resr <- lapply(1:length(vdep), function(x)
+ { # calculation on 3 reading frames of reverse strand
+ seq_tmp <- seq_reverse[(vdep[x]):(vdep[x] + sizewin - 1)] # temporary window
+ count(seq_tmp, wordsize = 3, start = 0, by = 3) # counting of triplets
+ })
+ resd <- matrix(unlist(resd), byrow = TRUE, ncol = 64) # conversion vector to contingency table
+ resr <- matrix(unlist(resr), byrow = TRUE, ncol = 64)
+ ##
+ ##CA##
+ ##
+ resd.coa <- dudi.coa(resd, scannf = FALSE, nf = 4) # CA on direct strand
+ resr.coa <- dudi.coa(resr, scannf = FALSE, nf = 4) # CA on reverse strand
+ rec <- list(seq, sizewin, shift, seqsize, seqname, vdep, vind, vstopd, vstopr, vinitd, vinitr, resd, resr, resd.coa, resr.coa)
+ return(rec)
+}
diff --git a/R/residuecount.R b/R/residuecount.R
new file mode 100644
index 0000000..4532d58
--- /dev/null
+++ b/R/residuecount.R
@@ -0,0 +1,33 @@
+# ==> residuecount&lrank=xx
+# <== code=xx&count=xx
+# Computes the total number of residues (nucleotides or aminoacids) in
+# all sequences of the list of specified rank.
+# Code != 0 indicates error.
+
+
+residuecount <- function(lrank, socket = autosocket()){
+ #
+ # Build request:
+ #
+ request <- paste("residuecount&lrank=", lrank, sep = "")
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ #
+ # Build result:
+ #
+ resitem <- parser.socket(answerFromServer)
+ if(resitem[1] != "0"){
+ warning(paste("error code returned by server :", resitem[1]))
+ return(NA)
+ } else {
+ return(as.numeric(resitem[2]))
+ }
+}
+
diff --git a/R/reverse.align.R b/R/reverse.align.R
new file mode 100644
index 0000000..4f45a14
--- /dev/null
+++ b/R/reverse.align.R
@@ -0,0 +1,86 @@
+reverse.align <- function(nucl.file,
+ protaln.file,
+ input.format = "fasta",
+ out.file,
+ output.format = "fasta",
+ align.prot = FALSE,
+ numcode = 1,
+ clustal.path = NULL,
+ forceDNAtolower = TRUE,
+ forceAAtolower = FALSE){
+ #
+ # Sequence import section
+ #
+ seq.nucl <- read.fasta(nucl.file, forceDNAtolower = forceDNAtolower)
+ nseqs <- length(seq.nucl) # number of sequences
+
+ if(!isTRUE(align.prot)){ ## the protein alignment file is provided
+ protaln <- read.alignment(protaln.file, format = input.format,
+ forceToLower = forceAAtolower)
+ } else { ## protein alignment file not provided, have to align with clustal
+ tmp <- tempfile(pattern = "clustal")
+ protseq.file <- tempfile(pattern = "protein")
+ write.fasta(sequences = lapply(seq.nucl, function(x)
+ translate(x, numcode = numcode)), names = names(seq.nucl), file.out = protseq.file)
+ system(paste(clustal.path, " -outfile=", tmp ," -infile=", protseq.file, sep = ""))
+ protaln <- read.alignment(tmp, format = "clustal", forceToLower = forceAAtolower)
+ input.format <- "clustal"
+ }
+ #
+ # Force sequences to be in the same order in the nucleic and protein
+ # versions. Use the protein order.
+ #
+ ordername <- unlist(lapply(protaln$nam, function(x) which(names(seq.nucl) == x)))
+ seq.nucl <- seq.nucl[ordername]
+ #
+ # The character used to represent gaps is function of the alignment file format.
+ #
+ gapchar <- NULL
+ if(input.format %in% c("fasta", "clustal", "phylip", "mase")){
+ gapchar <- "-"
+ }
+ if(input.format == "msf"){
+ gapchar <- "."
+ }
+ if(is.null(gapchar)) stop("no known gap character for this alignment format")
+ #
+ # Memory allocation, cds.aln is the result to write in file.out
+ #
+ cds.aln <- vector(mode = "list", length = nseqs)
+ names(cds.aln) <- protaln$nam
+ #
+ # index[[j]] is for current position in nucleic acid sequences number j
+ # expressed in codon units.
+ #
+ index <- as.list(rep(0, nseqs))
+ names(index) <- protaln$nam
+ #
+ # Main loop to build the reverse alignment.
+ # "allaln" is a flag still TRUE if no gap was found in a column.
+ #
+ ncharprot <- nchar(protaln$seq[1]) # number of AA + gaps
+ for(k in seq_len(ncharprot)){
+ allaln <- TRUE
+ for(j in seq_len(nseqs)){
+ if(substr(protaln$seq[j], k, k) != gapchar){
+ index[[j]] <- index[[j]] + 1
+ } else {
+ allaln <- FALSE
+ }
+ }
+ if(allaln){
+ #
+ # There was no gap in this column, we include the corresponding codon
+ # in the nucleic acid alignment:
+ #
+ for(j in seq_len(nseqs)){
+ cds.aln[[j]] <- c(cds.aln[[j]], seq.nucl[[j]][(3*index[[j]]-2):(3*index[[j]])])
+ }
+ }
+ }
+ #
+ # Write into output file the reverse alignment.
+ #
+ write.fasta(sequences = cds.aln, names = names(seq.nucl), file.out = out.file, open = "w")
+ return(NULL)
+}
diff --git a/R/rho.R b/R/rho.R
new file mode 100644
index 0000000..a3c37b0
--- /dev/null
+++ b/R/rho.R
@@ -0,0 +1,16 @@
+rho <- function (sequence, wordsize = 2, alphabet = s2c("acgt"))
+{
+ wordcount <- count(sequence, wordsize, freq = FALSE, alphabet = alphabet)
+ uni <- count(sequence, 1, freq = TRUE, alphabet = alphabet)
+
+ expected_wordfreq <- function (wordsize, uni)
+ {
+ if (wordsize == 1)
+ return(uni)
+ else kronecker(uni, expected_wordfreq(wordsize - 1, uni))
+ }
+
+ expected_wordcount <- sum(wordcount)*expected_wordfreq(wordsize, uni)
+
+ return(wordcount/expected_wordcount)
+}
diff --git a/R/rot13.R b/R/rot13.R
new file mode 100644
index 0000000..872d818
--- /dev/null
+++ b/R/rot13.R
@@ -0,0 +1,6 @@
+rot13 <- function(string){
+ if(!is.character(string)) stop("character string expected")
+ old <- c2s(c(letters, LETTERS))
+ new <- c2s(c(letters[14:26], letters[1:13], LETTERS[14:26], LETTERS[1:13]))
+ chartr(old = old, new = new, x = string)
+}
diff --git a/R/s2n.R b/R/s2n.R
new file mode 100644
index 0000000..70fcfe6
--- /dev/null
+++ b/R/s2n.R
@@ -0,0 +1,23 @@
+##############################################################################
+#
+# simple numerical encoding of a DNA sequence that by default
+# is independent of locale.
+#
+##############################################################################
+
+s2n <- function(seq, levels = s2c("acgt"), base4 = TRUE, forceToLower = TRUE)
+{
+ #
+ # Check that sequence is a vector of chars:
+ #
+ if(nchar(seq[1]) > 1) stop("sequence is not a vector of chars")
+ #
+ # Force to lower-case letters if requested:
+ #
+ if(forceToLower) seq <- tolower(seq)
+
+ if( base4 )
+ unclass(factor(seq, levels = levels ) ) - 1
+ else
+ unclass(factor(seq, levels = levels ) )
+}
\ No newline at end of file
diff --git a/R/savelist.R b/R/savelist.R
new file mode 100644
index 0000000..2808091
--- /dev/null
+++ b/R/savelist.R
@@ -0,0 +1,71 @@
+# ==> savelist&lrank=xx{&type=[N|A]}
+# <== code=0\n
+# list element names or acc nos on successive lines
+# savelist END.\n
+# To obtain names of all elements of a bit list sent on socket on successive lines;
+# for sequence lists, option &type=A, will give accession numbers instead of seq names;
+# end of series of lines is when savelist END.\n appears
+# lrank : rank of bitlist
+# type: A gives accession numbers, N (default) gives seq names; useful for seq lists only
+
+savelist <- function(lrank, type = c("N", "A"),
+ filename = paste(gln(lrank), ifelse(type == "N", "mne", "acc"), sep = "."),
+ socket = autosocket(), warnme = TRUE){
+ #
+ # Check argument:
+ #
+ if(!is.finite(lrank)) stop("wrong lrank argument")
+ if(getliststate(lrank)$type != "SQ") stop("wrong ACNUC list type, should be SQ for sequences")
+ #
+ # Default is "N":
+ #
+ type <- type[1]
+ if( !(type %in% c("N", "A"))) stop("wrong type argument")
+ #
+ # Build request:
+ #
+ request <- paste("savelist&lrank=", lrank, "&type=", type, sep = "")
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ #
+ # Check that no error is returned:
+ #
+ if(parser.socket(answerFromServer[1])[1] != "0"){
+ warning(paste("error code from server:", answerFromServer[1]))
+ return(NA)
+ }
+ #
+ # Remove first line (code=0):
+ #
+ answerFromServer <- answerFromServer[-1]
+ #
+ # Check completness of answer:
+ #
+ if( answerFromServer[length(answerFromServer)] != "savelist END."){
+ warning("incomplete answer from server")
+ return(NA)
+ }
+ #
+ # Remove last line and dump to file:
+ #
+ answerFromServer <- answerFromServer[-length(answerFromServer)]
+ writeLines(answerFromServer, con = filename)
+ #
+ # Say it's over
+ #
+ if(warnme){
+ if(type == "N"){
+ cat(paste(length(answerFromServer), "sequence mnemonics written into file:", filename), sep = "\n")
+ } else {
+ cat(paste(length(answerFromServer), "sequence accession numbers written into file:", filename), sep = "\n")
+ }
+ }
+}
+
diff --git a/R/setlistname.R b/R/setlistname.R
new file mode 100644
index 0000000..e378cdc
--- /dev/null
+++ b/R/setlistname.R
@@ -0,0 +1,31 @@
+# ==> setlistname&lrank=xx&name="xx"
+# <== code=xx
+# Sets the name of a list identified by its rank.
+# Returned code : 0 if OK,
+# 3 if another list with that name already existed and was deleted
+# 4 no list of rank exists
+
+setlistname <- function(lrank, name = "list1", socket = autosocket()){
+ #
+ # Build request:
+ #
+ request <- paste("setlistname&lrank=", lrank, "&name=\"", name, "\"", sep = "")
+ writeLines(request, socket, sep = "\n")
+ answerFromServer <- readLines(socket, n = 1)
+ #
+ # Check that there is an answer from server:
+ #
+ if(length(answerFromServer) == 0){
+ warning("Empty answer from server")
+ return(NA)
+ }
+ #
+ # Build result:
+ #
+ resitem <- parser.socket(answerFromServer)
+ if(resitem[1] != "0"){
+ warning(paste("code returned by server :", resitem[1]))
+ }
+ return(as.numeric(resitem[1]))
+}
+
diff --git a/R/splitseq.R b/R/splitseq.R
new file mode 100644
index 0000000..93e58ed
--- /dev/null
+++ b/R/splitseq.R
@@ -0,0 +1,15 @@
+splitseq <- function(seq, frame = 0, word = 3){
+#
+# Compute all start positions of words to be returned:
+#
+ starts <- seq(from = frame + 1, to = length(seq), by = word)
+#
+# Extract them all:
+#
+ res <- sapply(starts, function(x) c2s(seq[x:(x + word - 1)]))
+#
+# remove last one if uncorrect length:
+#
+ if(nchar(res[length(res)]) != word) res <- res[-length(res)]
+ return(res)
+}
diff --git a/R/stresc.R b/R/stresc.R
new file mode 100644
index 0000000..9be6de1
--- /dev/null
+++ b/R/stresc.R
@@ -0,0 +1,21 @@
+stresc <- function(strings)
+{
+ #
+ # For some reason I don't remember, the objects "fromchar" and "tochar" were
+ # originally arguments of the function to allow for more flexibillity but
+ # are now hard-encoded within the function.
+ #
+ fromchar <- s2c("\\{}$^_%#&~[]|")
+ tochar <- c("$\\backslash$", "\\{", "\\}", "\\$", "\\^{}", "\\_", "\\%", "\\#", "\\&", "\\~{}",
+ "\\lbrack{}", "\\rbrack{}","\\textbar{}")
+ #
+ # Definition of the function to escape LaTeX character in one string:
+ #
+ f <- function(string){
+ c2s( sapply(s2c(string), function(x) ifelse(x %in% fromchar, tochar[which(x == fromchar)], x)))
+ }
+ #
+ # Now apply it to all elements of vector string:
+ #
+ sapply(strings, f, USE.NAMES = FALSE)
+}
diff --git a/R/stutterabif.R b/R/stutterabif.R
new file mode 100644
index 0000000..6f9b0b9
--- /dev/null
+++ b/R/stutterabif.R
@@ -0,0 +1,109 @@
+stutterabif <- function(abifdata, chanel, poswild,
+ datapointbefore = 70, datapointafter = 20,
+ datapointsigma = 3.5,
+ chanel.names = c(1:4,105),
+ DATA = paste("DATA", chanel.names[chanel], sep = "."),
+ maxrfu = 1000,
+ method = "monoH.FC",
+ pms = 6,
+ fig = FALSE){
+
+ xseq <- floor((poswild-datapointbefore):(poswild+datapointafter))
+ yseq <- abifdata$Data[[DATA]][xseq]
+ #
+ # The baseline for the signal is taken as the most common value:
+ #
+ baseline <- baselineabif(yseq, maxrfu = maxrfu)
+ #
+ # Values below baseline are forced to baseline:
+ #
+ yseq[yseq < baseline] <- baseline
+ #
+ # Set baseline to zero:
+ #
+ yseq <- yseq - baseline
+ #
+ # First pass to fit a gaussian on wild allele:
+ #
+ ytheo.one <- function(p, x) p[1]*dnorm(x, p[2], p[3])
+ obj.one <- function(p) sum(( yseq - ytheo.one(p, xseq))^2)
+ guess.one <- numeric(3)
+ guess.one[1] <- datapointsigma*sqrt(2*pi)*max(yseq)
+ guess.one[2] <- poswild
+ guess.one[3] <- datapointsigma
+ suppressWarnings(nlmres.one <- nlm(obj.one, p = guess.one))
+ #
+ # Second pass to estimate stutter allele parameters. We do
+ # not take into account wild peak allele data.
+ #
+ est <- nlmres.one$estimate
+ censored <- floor(est[2] - 6*est[3]) # cut at mu - 6 sigma
+ ytheo.two <- function(p, x) p[1]*dnorm(x, p[2], p[3])
+ used <- ( xseq < censored )
+ obj.two <- function(p) sum(( yseq[used] - ytheo.two(p, xseq[used]))^2)
+ guess.two <- numeric(3)
+ guess.two[1] <- datapointsigma*sqrt(2*pi)*max(yseq[used])
+ guess.two[2] <- xseq[which.max(yseq[used])]
+ guess.two[3] <- datapointsigma
+ suppressWarnings(nlmres.two <- nlm(obj.two, p = guess.two))
+ est2 <- nlmres.two$estimate
+ #
+ # Fit a spline:
+ #
+ spfun <- splinefun(xseq, yseq, method = method)
+ xx <- seq(min(xseq), max(xseq), le = 500)
+ yy <- spfun(xx)
+ #
+ # Compute output result:
+ #
+
+ x1inf <- est2[2] - pms*est2[3]
+ x1sup <- est2[2] + pms*est2[3]
+ l1 <- optimize(spfun, interval = c(x1inf, x1sup), maximum = TRUE)$maximum
+ h1 <- spfun(l1)
+ s1 <- integrate(spfun, x1inf, x1sup)$value
+
+ x2inf <- est[2] - pms*est[3]
+ x2sup <- est[2] + pms*est[3]
+ l2 <- optimize(spfun, interval = c(x2inf, x2sup), maximum = TRUE)$maximum
+ h2 <- spfun(l2)
+ s2 <- integrate(spfun, x2inf, x2sup)$value
+
+ rh <- h1/h2
+ rs <- s1/s2
+
+ p <- list(p1 = est2[1], mu1 = est2[2], sd1 = est2[3],
+ p2 = est[1], mu2 = est[2], sd2 = est[3],
+ xseq = xseq, yseq = yseq)
+ if(fig){
+ plot(xseq, yseq,
+ main = paste("rh = ", round(rh, 5), "rs =", round(rs, 5)),
+ ylab = "RFU", las = 1,
+ xlab = "Time in datapoint units")
+ abline(h = 0, lty = 3)
+ abline(v = c(l1, l2), lty=2, col = c("red", "blue"))
+ abline(h = c(h1, h2), lty=2, col = c("red", "blue"))
+
+ #lines(xx, yy, col = "red")
+ #
+ # Stutter:
+ #
+ xx <- seq(x1inf, x1sup, le = 500)
+ yy <- spfun(xx)
+ lines(xx, yy, col = "red", lwd = 2)
+ #
+ # Wild Allele:
+ #
+ xx <- seq(x2inf, x2sup, le = 500)
+ yy <- spfun(xx)
+ lines(xx, yy, col = "blue", lwd = 2)
+ #
+ # legend:
+ #
+ legend("topleft", inset = 0.01, legend = c("Stutter product",
+ "Corresponding allele"), lty = 1, lwd = 2, col = c("red", "blue"),
+ bg = "white", title = "Datapoints used for:")
+ }
+ return(list(rh = rh, rs = rs, h1 = h1, h2 = h2, s1 = s1, s2 = s2, p = p))
+}
+
diff --git a/R/swap.R b/R/swap.R
new file mode 100644
index 0000000..f3e99fd
--- /dev/null
+++ b/R/swap.R
@@ -0,0 +1,9 @@
+swap <- function(x, y){
+ x.sub <- substitute(x)
+ y.sub <- substitute(y)
+ x.val <- x
+ e <- parent.frame()
+ do.call("<-", list(x.sub, y.sub), envir = e)
+ do.call("<-", list(y.sub, x.val), envir = e)
+}
+
diff --git a/R/synonymous.R b/R/synonymous.R
new file mode 100644
index 0000000..2e6b52f
--- /dev/null
+++ b/R/synonymous.R
@@ -0,0 +1,74 @@
+syncodons <- function(codons, numcode = 1)
+{
+ #
+ # Check argument:
+ #
+ if( any(nchar(codons) != 3)) stop("vector of three character string elements expected")
+
+ #
+ # Force to lower case:
+ #
+ alph <- unlist(lapply(codons, s2c))
+ if(any(alph %in% LETTERS)) codons <- tolower(codons)
+
+ #
+ # Get the genetic code map:
+ #
+ allaminos <- sapply(words(), function(x) translate(s2c(x), numcode = numcode))
+
+ getsyn <- function(c) names(allaminos[allaminos == allaminos[[c]]])
+ synonymous <- lapply(codons, getsyn)
+ names(synonymous) <- codons
+ return(synonymous)
+}
+
+synsequence <- function (sequence, numcode = 1, ucoweight = NULL)
+{
+ tra = translate(sequence, numcode = numcode)
+ cod = splitseq(sequence)
+ if (is.null(ucoweight)) {
+ for (a in unique(tra)) {
+ pos = which(tra == a)
+ if (length(pos) > 1) {
+ newcod = sample(cod[pos])
+ cod[pos] = newcod
+ }
+ }
+ }
+ else {
+ for (a in unique(tra)) {
+ pos = which(tra == a)
+ urne = rep(names(ucoweight[[a]]), ucoweight[[a]] *
+ length(pos))
+ if (length(urne) > 1) {
+ newcod = sample(urne, length(pos))
+ }
+ else if (length(urne) == 1) {
+ newcod = urne
+ }
+ else {
+ print(a)
+ stop("bad codon usage content")
+ }
+ cod[pos] = newcod
+ }
+ }
+ newseq = s2c(c2s(cod))
+ return(newseq)
+}
+
+ucoweight <- function (sequence, numcode = 1)
+{
+ allaminos = s2c(c2s(SEQINR.UTIL$CODES.NCBI$CODES[numcode]))
+ allcodons = splitseq(as.vector(t(cbind(rep(s2c("tcag"), each = 16),
+ rep(s2c("tcag"), each = 4), rep(s2c("tcag"), 4)))))
+ syncodons = lapply(seq(21), function(a) {
+ which(allaminos == unique(allaminos)[a])
+ })
+ usage = uco(sequence)[allcodons] #re-order according to NCBI
+ weight = lapply(seq(21), function(b) {
+ usage[syncodons[b][[1]]]
+ })
+ names(weight) = unique(allaminos)
+ return(weight)
+}
diff --git a/R/tablecode.R b/R/tablecode.R
new file mode 100644
index 0000000..77302cd
--- /dev/null
+++ b/R/tablecode.R
@@ -0,0 +1,147 @@
+#
+# Genetic code table as in Text Books
+#
+
+tablecode <- function(numcode = 1, urn.rna = s2c("TCAG"), dia = FALSE,
+latexfile = NULL, label = latexfile, size = "normalsize", caption = NULL,
+preaa = rep("", 64), postaa = rep("", 64),
+precodon = preaa, postcodon = postaa)
+{
+ aa1 <- a()
+ aa3 <- aaa()
+ codename <- SEQINR.UTIL$CODES.NCBI[numcode, "ORGANISMES"]
+ urn <- s2c("tcag") # internal
+#
+# Make default caption for LaTeX table:
+#
+ if( is.null(caption) ){
+ caption <- paste("\\caption{Genetic code number ",
+ numcode, ": ", codename, ".}", sep = "")
+ }
+#
+# As a LaTex table:
+#
+ if( ! is.null(latexfile) ) {
+ Tfile <- file(latexfile, open = "w")
+ cat("\\begin{table}", file = Tfile, sep = "
+")
+ cat("\\begin{center}", file = Tfile, sep = "
+")
+#
+# Character size:
+#
+ cat(paste("{\\", size, sep = ""), file = Tfile, sep = "
+")
+
+ cat("\\begin{tabular}{*{13}{l}}", file = Tfile, sep = "
+")
+ cat("\\hline", file = Tfile, sep = "
+")
+ cat("\\\\", file = Tfile, sep = "
+")
+
+ ncodon <- 1 # codon rank as in
+ #paste(paste(rep(s2c("tcag"), each = 16), s2c("tcag"), sep = ""), rep(s2c("tcag"), each = 4), sep = "")
+ for( i in 0:3 )
+ {
+ for( j in 0:3 )
+ {
+ for( k in 0:3 )
+ {
+ codon <- c(urn[i+1], urn[k+1], urn[j+1])
+ codon.urn <- paste(urn.rna[i+1], urn.rna[k+1], urn.rna[j+1], sep = "", collapse = "")
+ codon.urn <- paste(precodon[ncodon], codon.urn, postcodon[ncodon], sep = "")
+
+ aminoacid <- aa3[which(aa1 == translate(codon, numcode = numcode))]
+ aminoacid <- paste(preaa[ncodon], aminoacid, postaa[ncodon], sep = "")
+
+ cat(paste(codon.urn, aminoacid, " &", sep = " & "), file = Tfile, sep = "
+")
+ ncodon <- ncodon + 1
+ }
+ cat("\\\\", file = Tfile, sep = "
+")
+ }
+ cat("\\\\", file = Tfile, sep = "
+")
+ }
+ cat("\\hline", file = Tfile, sep = "
+")
+ cat("\\end{tabular}", file = Tfile, sep = "
+")
+#
+# Caption:
+#
+ cat(caption, file = Tfile, sep = "
+")
+#
+# LaTeX label:
+#
+ cat(paste("\\label{", label, "}", sep = ""), file = Tfile, sep = "
+")
+#
+# End character size:
+#
+ cat("}", file = Tfile, sep = "
+")
+
+ cat("\\end{center}", file = Tfile, sep = "
+")
+ cat("\\end{table}", file = Tfile, sep = "
+")
+ close(Tfile)
+ return(invisible(NULL))
+ }
+#
+# END LATEX
+#
+ if( dia )
+ {
+ op <- par(no.readonly = TRUE)
+ par(bg = "blue")
+ par(fg = "yellow")
+ par(col = "yellow")
+ par(col.axis = "yellow")
+ par(col.lab = "yellow")
+ par(col.main = "yellow")
+ par(col.sub = "yellow")
+ }
+
+ plot.new()
+ plot.window(xlim=c(0,100),ylim=c(0,100))
+
+ segments( 0, 102, 100, 102, lwd = 2)
+ segments( 0, 0, 100, 0, lwd = 2)
+ segments( 0, 97, 100, 97)
+
+
+ text(x=0, y = 98.5, font = 2, adj = c(0, 0),
+ lab = paste("Genetic code", numcode,":",codename))
+
+ urn <- c("t","c","a","g") # internal
+ for( i in 0:3 )
+ {
+ for( j in 0:3 )
+ {
+ for( k in 0:3 )
+ {
+ codon <- c(urn[i+1], urn[j+1], urn[k+1])
+
+ text( x = 100*j/4, y = 95 - 100*i/4 -5*k, adj = c(-0.5,1.5),
+ lab = urn.rna[i+1] )
+
+ text( x = 100*j/4 + 3, y = 95 - 100*i/4 -5*k, adj = c(-0.5,1.5),
+ lab = urn.rna[j+1] )
+
+ text( x = 100*j/4 + 6, y = 95 - 100*i/4 -5*k, adj = c(-0.5,1.5),
+ lab = urn.rna[k+1] )
+
+ aminoacid <- aa3[which(aa1 == translate(codon, numcode = numcode))]
+ text( x = 100*j/4 + 12, y = 95 - 100*i/4 -5*k, adj = c(-0.5,1.5),
+ lab = aminoacid )
+ }
+ }
+ }
+ if(dia)
+ par(op)
+}
diff --git a/R/test.co.recstat.R b/R/test.co.recstat.R
new file mode 100755
index 0000000..b9ceb3b
--- /dev/null
+++ b/R/test.co.recstat.R
@@ -0,0 +1,205 @@
+##
+# This function tests if a region located between two stop codons could be a putative CDS
+#
+# Data used are the factor scores of the CA computed on the windows by recstat function
+##
+#v.18.08.2011
+test.co.recstat <- function(rec, fac = 1, length.min = 150, stop.max = 0.2, win.lim = 0.8, direct = TRUE, level = 0.01)
+{
+ if (fac < 0 | 4 < fac)
+ { # test if factor is between 1 and 4
+ print("Factor number is not in 1:4.")
+ return()
+ }
+ seq <- rec[[1]] # recovery of elements of list n
+ sizewin <- rec[[2]]
+ shift <- rec[[3]]
+ seqsize <- rec[[4]]
+ vstopd <- rec[[8]]
+ vstopr <- rec[[9]]
+ resd <- rec[[12]]
+ resr <- rec[[13]]
+ recd <- rec[[14]]
+ recr <- rec[[15]]
+ if (seqsize < length.min)
+ {
+ print("Seqence length is shorter than minimum distance between two Stop codons.")
+ return()
+ }
+ table.recstat <- function(vstop)
+ {
+ tabCDS <- numeric() # initialization
+ j <- 0
+ for (i in 2:length(vstop))
+ { # for each stop codons positions vector
+ if ((vstop[i] - vstop[i - 1]) > length.min)
+ { # test if space between codons is above the threshold
+ # in each case gets the p-values between each stop codon and range it in vector seg
+ seg <- pvalvec[which((vstop[i - 1] - pvalvec[, 2])/(sizewin + 2) <= stop.max &
+ (vstop[i] - pvalvec[, 2])/(sizewin + 2) >= (1 - stop.max)), 1]
+ # create a table with calculation on those vectors seg then go to next space inter-codon, each row correspond to a space inter-stop codon
+ k <- 0
+ for (l in 1:length(seg))
+ {
+ if (seg[l] < level)
+ {
+ k <- k + 1
+ }
+ if (k/length(seg) > win.lim)
+ {
+ result <- 1
+ }
+ else
+ {
+ result <- 0
+ }
+ }
+ tabCDS <- c(tabCDS, vstop[i - 1]+3, vstop[i]+2, result)
+ j <- j + 1
+ }
+ }
+ tabCDS <- matrix(tabCDS, nrow = j, ncol = 3, byrow = TRUE) # conversion list to table
+ return(tabCDS)
+ }
+ ##
+ ##direct strand##
+ ##
+ if (direct)
+ {
+ vstopdindphase <- numeric()
+ if (length(vstopd) > 0)
+ { # test if vector is not empty because problem with modulo
+ vstopdindphase <- sapply(1:length(vstopd), function(x)
+ { # index vector of reading frame of vector vstopd
+ if (vstopd[x]%%3 == 1)
+ {
+ vstopdindphase <- c(vstopdindphase, 1)
+ }
+ else
+ {
+ if (vstopd[x]%%3 == 2)
+ {
+ vstopdindphase <- c(vstopdindphase, 2)
+ }
+ else
+ {
+ vstopdindphase <- c(vstopdindphase, 3)
+ }
+ }
+ })
+ }
+ vstop1 <- vstopd[vstopdindphase == 1] # vector with only stop codons in reading frame 1
+ vstop2 <- vstopd[vstopdindphase == 2] # vector with only stop codons in reading frame 2
+ vstop3 <- vstopd[vstopdindphase == 3] # vector with only stop codons in reading frame 3
+ vstop1 <- c(vstop1, 1-3, seqsize-(seqsize%%3)-2) # add start and end positions, "-3" and "-2" because of table.recstat()
+ vstop2 <- c(vstop2, 2-3, seqsize-((seqsize-1)%%3)-2)
+ vstop3 <- c(vstop3, 3-3, seqsize-((seqsize-2)%%3)-2)
+ vstop1 <- sort(unique(vstop1)) # sort of the vector
+ vstop2 <- sort(unique(vstop2))
+ vstop3 <- sort(unique(vstop3))
+ tab <- numeric()
+ for (i in 1:dim(resd)[1])
+ {
+ if (sum(resd[i, ]) == sizewin/3)
+ {
+ for (j in 1:64)
+ {
+ tab <- c(tab, rep(recd$co[j, 1], resd[i, j]))
+ }
+ }
+ }
+ tab <- matrix(unlist(tab), byrow = TRUE, ncol = sizewin/3)
+ seqisize <- floor((dim(tab)[1])/3)
+ phase <- c(rep(1, sizewin/3), rep(2, sizewin/3), rep(3, sizewin/3))
+ phase <- as.factor(phase)
+ pvalvec <- numeric()
+ for (i in 1:seqisize)
+ {
+ v1 <- tab[i,]
+ v2 <- tab[seqisize + i,]
+ v3 <- tab[2*seqisize + i,]
+ v <- c(v1, v2, v3)
+ x <- kruskal.test(v ~ phase)$p.value
+ pvalvec <- c(pvalvec, x, (i - 1)*shift + 1)
+ }
+ pvalvec <- matrix(unlist(pvalvec), byrow = TRUE, ncol = 2)
+# plot((sizewin/2) + (0:(seqisize - 1))*shift + 1, pvalvec[,1], type = "l")
+ tab1 <- table.recstat(vstop1)
+ colnames(tab1) <- c("Start", "End", "CDS") # definition of table headings
+ tab2 <- table.recstat(vstop2)
+ colnames(tab2) <- c("Start", "End", "CDS")
+ tab3 <- table.recstat(vstop3)
+ colnames(tab3) <- c("Start", "End", "CDS")
+ return(list(tab1, tab2, tab3))
+ }
+ ##
+ ##reverse strand##
+ ##
+ if (!direct)
+ {
+ vstoprindphase <- numeric()
+ if (length(vstopr) > 0)
+ {
+ vstoprindphase <- sapply(1:length(vstopr), function(x)
+ {
+ if (vstopr[x]%%3 == 1)
+ {
+ vstoprindphase <- c(vstoprindphase, 1)
+ }
+ else
+ {
+ if (vstopr[x]%%3 == 2)
+ {
+ vstoprindphase <- c(vstoprindphase, 2)
+ }
+ else
+ {
+ vstoprindphase <- c(vstoprindphase, 3)
+ }
+ }
+ })
+ }
+ vstop1 <- vstopr[vstoprindphase == 1]
+ vstop2 <- vstopr[vstoprindphase == 2]
+ vstop3 <- vstopr[vstoprindphase == 3]
+ vstop1 <- c(vstop1, 1-3, seqsize-(seqsize%%3)-2) # add start and end positions
+ vstop2 <- c(vstop2, 2-3, seqsize-((seqsize-1)%%3)-2)
+ vstop3 <- c(vstop3, 3-3, seqsize-((seqsize-2)%%3)-2)
+ vstop1 <- sort(unique(vstop1))
+ vstop2 <- sort(unique(vstop2))
+ vstop3 <- sort(unique(vstop3))
+ tab <- numeric()
+ for (i in 1:dim(resr)[1])
+ {
+ if (sum(resr[i, ]) == sizewin/3)
+ {
+ for (j in 1:64)
+ {
+ tab <- c(tab, rep(recr$co[j, 1], resr[i, j]))
+ }
+ }
+ }
+ tab <- matrix(unlist(tab), byrow = TRUE, ncol = sizewin/3)
+ seqisize <- floor((dim(tab)[1])/3)
+ phase <- c(rep(1, sizewin/3), rep(2, sizewin/3), rep(3, sizewin/3))
+ phase <- as.factor(phase)
+ pvalvec <- numeric()
+ for (i in 1:seqisize)
+ {
+ v1 <- tab[i,]
+ v2 <- tab[seqisize + i,]
+ v3 <- tab[2*seqisize + i,]
+ v <- c(v1, v2, v3)
+ x <- kruskal.test(v ~ phase)$p.value
+ pvalvec <- c(pvalvec, x, (i - 1)*shift + 1)
+ }
+ pvalvec <- matrix(unlist(pvalvec), byrow = TRUE, ncol = 2)
+ tab1 <- table.recstat(vstop1)
+ colnames(tab1) <- c("Start", "End", "CDS") # definition of table headings
+ tab2 <- table.recstat(vstop2)
+ colnames(tab2) <- c("Start", "End", "CDS")
+ tab3 <- table.recstat(vstop3)
+ colnames(tab3) <- c("Start", "End", "CDS")
+ return(list(tab1, tab2, tab3))
+ }
+}
\ No newline at end of file
diff --git a/R/test.li.recstat.R b/R/test.li.recstat.R
new file mode 100755
index 0000000..ae08b38
--- /dev/null
+++ b/R/test.li.recstat.R
@@ -0,0 +1,180 @@
+##
+# This function tests if a region located between two stop codons could be a putative CDS
+#
+# Data used are the factor scores of the CA computed on the windows by recstat function
+##
+#v.18.08.2011
+test.li.recstat <- function(rec, fac = 1, length.min = 150, stop.max = 0.2, direct = TRUE, level = 0.05)
+{
+ if (fac < 0 | 4 < fac)
+ { # test if factor is between 1 and 4
+ print("Factor number is not in 1:4.")
+ return()
+ }
+ seq <- rec[[1]] # recovery of elements of list n
+ sizewin <- rec[[2]]
+ shift <- rec[[3]]
+ seqsize <- rec[[4]]
+ vdep <- rec[[6]]
+ vind <- rec[[7]]
+ vstopd <- rec[[8]]
+ vstopr <- rec[[9]]
+ recd <- rec[[14]]
+ recr <- rec[[15]]
+ if (seqsize < length.min)
+ {
+ print("Seqence length is shorter than minimum distance between two Stop codons.")
+ return()
+ }
+ table.recstat <- function(vstop, rec, frame)
+ {
+ tabCDS <- numeric() # initialization
+ j <- 0
+ for (i in 2:length(vstop))
+ { # for each stop codons positions vector
+ if ((vstop[i] - vstop[i - 1]) > length.min)
+ { # test if space between codons is above the threshold
+ # in each case gets the values between each stop codon for the 3 reading frames and range it in 3 vector seg
+ seg1 <- rec$li[which((vstop[i - 1] - vdep[1:seqisize1])/sizewin <= stop.max &
+ (vstop[i] - vdep[1:seqisize1])/sizewin >= (1 - stop.max)), fac]
+ seg2 <- rec$li[(which((vstop[i - 1] - vdep[(seqisize1 + 1):(seqisize1 + seqisize2)])/sizewin <= stop.max
+ & (vstop[i] - vdep[(seqisize1 + 1):(seqisize1 + seqisize2)])/sizewin >= (1 - stop.max)) + seqisize1), fac]
+ seg3 <- rec$li[(which((vstop[i - 1] - vdep[(seqisize1 + seqisize2 + 1):(length(vdep))])/sizewin <= stop.max
+ & (vstop[i] - vdep[(seqisize1 + seqisize2 + 1):(length(vdep))])/sizewin >= (1 - stop.max)) + seqisize1 + seqisize2), fac]
+ # create a table with calculation on those vectors seg then go to next space
+ # inter-codon, each row correspond to a space inter-stop codon
+ if (frame == 1)
+ {
+ test1 <- t.test(seg1, seg2)$p.value
+ test2 <- t.test(seg1, seg3)$p.value
+ }
+ if (frame == 2)
+ {
+ test1 <- t.test(seg2, seg1)$p.value
+ test2 <- t.test(seg2, seg3)$p.value
+ }
+ if (frame == 3)
+ {
+ test1 <- t.test(seg3, seg1)$p.value
+ test2 <- t.test(seg3, seg2)$p.value
+ }
+ if (test1 < level & test2 < level)
+ {
+ result <- 1
+ }
+ else
+ {
+ result <- 0
+ }
+ tabCDS <- c(tabCDS, vstop[i-1]+3, vstop[i]+2, mean(seg1), mean(seg2), mean(seg3), test1, test2, result)
+ j <- j + 1
+ }
+ }
+ tabCDS <- matrix(tabCDS, nrow = j, ncol = 8, byrow = TRUE) # conversion list to table
+ return(tabCDS)
+ }
+ seqisize <- floor((dim(recd$li)[1])/3) # number of window by reading frame, we take the integer part
+ if ((dim(recd$li)[1])%%3 == 1) # adaptation of number of window between each reading frame
+ {
+ seqisize1 <- seqisize + 1 # for fr1
+ seqisize2 <- seqisize # for fr2
+ }
+ if ((dim(recd$li)[1])%%3 == 2)
+ {
+ seqisize1 <- seqisize + 1
+ seqisize2 <- seqisize + 1
+ }
+ if ((dim(recd$li)[1])%%3 == 0)
+ {
+ seqisize1 <- seqisize
+ seqisize2 <- seqisize
+ }
+ ##
+ ##direct strand##
+ ##
+ if (direct)
+ {
+ vstopdindphase <- numeric()
+ if (length(vstopd) > 0)
+ { # test if vector is not empty because problem with modulo
+ vstopdindphase <- sapply(1:length(vstopd), function(x) { # index vector of reading frame of vector vstopd
+ if (vstopd[x]%%3 == 1)
+ {
+ vstopdindphase <- c(vstopdindphase, 1)
+ }
+ else
+ {
+ if (vstopd[x]%%3 == 2)
+ {
+ vstopdindphase <- c(vstopdindphase, 2)
+ }
+ else
+ {
+ vstopdindphase <- c(vstopdindphase, 3)
+ }
+ }
+ })
+ }
+ vstop1 <- vstopd[vstopdindphase == 1] # vector with only stop codons in reading frame 1
+ vstop2 <- vstopd[vstopdindphase == 2] # vector with only stop codons in reading frame 2
+ vstop3 <- vstopd[vstopdindphase == 3] # vector with only stop codons in reading frame 3
+ vstop1 <- c(vstop1, 1-3, seqsize-(seqsize%%3)-2) # add start and end positions, "-3" and "-2" because of table.recstat()
+ vstop2 <- c(vstop2, 2-3, seqsize-((seqsize-1)%%3)-2)
+ vstop3 <- c(vstop3, 3-3, seqsize-((seqsize-2)%%3)-2)
+ vstop1 <- sort(unique(vstop1)) # sort of the vector
+ vstop2 <- sort(unique(vstop2))
+ vstop3 <- sort(unique(vstop3))
+ tab1 <- table.recstat(vstop1, recd, 1)
+ colnames(tab1) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(1,2)", "t(1,3)", "CDS")
+ tab2 <- table.recstat(vstop2, recd, 2)
+ colnames(tab2) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(2,1)", "t(2,3)", "CDS")
+ tab3 <- table.recstat(vstop3, recd, 3)
+ colnames(tab3) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(3,1)", "t(3,2)", "CDS")
+ return(list(tab1, tab2, tab3))
+ }
+ ##
+ ##reverse strand##
+ ##
+ if (!direct)
+ {
+ vstoprindphase <- numeric()
+ if (length(vstopr) > 0)
+ {
+ vstoprindphase <- sapply(1:length(vstopr), function(x)
+ {
+ if (vstopr[x]%%3 == 1)
+ {
+ vstoprindphase <- c(vstoprindphase, 1)
+ }
+ else
+ {
+ if (vstopr[x]%%3 == 2)
+ {
+ vstoprindphase <- c(vstoprindphase, 2)
+ }
+ else
+ {
+ vstoprindphase <- c(vstoprindphase, 3)
+ }
+ }
+ })
+ }
+ vstop1 <- vstopr[vstoprindphase == 1]
+ vstop2 <- vstopr[vstoprindphase == 2]
+ vstop3 <- vstopr[vstoprindphase == 3]
+ vstop1 <- c(vstop1, 1-3, seqsize-(seqsize%%3)-2) # add start and end positions
+ vstop2 <- c(vstop2, 2-3, seqsize-((seqsize-1)%%3)-2)
+ vstop3 <- c(vstop3, 3-3, seqsize-((seqsize-2)%%3)-2)
+ vstop1 <- sort(unique(vstop1))
+ vstop2 <- sort(unique(vstop2))
+ vstop3 <- sort(unique(vstop3))
+ tab1 <- table.recstat(vstop1, recr, 1)
+ colnames(tab1) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(1,2)", "t(1,3)", "CDS")
+ tab2 <- table.recstat(vstop2, recr, 2)
+ colnames(tab2) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(2,1)", "t(2,3)", "CDS")
+ tab3 <- table.recstat(vstop3, recr, 3)
+ colnames(tab3) <- c("Start", "End", "Mean 1", "Mean 2", "Mean 3", "t(3,1)", "t(3,2)", "CDS")
+ return(list(tab1, tab2, tab3))
+ }
+}
+
diff --git a/R/translate.R b/R/translate.R
new file mode 100755
index 0000000..ea7766f
--- /dev/null
+++ b/R/translate.R
@@ -0,0 +1,74 @@
+translate <- function(seq, frame = 0, sens = "F", numcode = 1, NAstring = "X",
+ambiguous = FALSE)
+{
+
+ if(any(seq%in%LETTERS)){
+ seq <- tolower(seq)
+ }
+ #
+ # Take the reverse complementary strand when required:
+ #
+
+ if(sens == "R") seq <- comp(rev(seq), ambiguous = ambiguous)
+
+ #
+ # Transform the sequence in its numerical encoding equivalent
+ # with textbook order, that is t = 0, c = 1, a = 2, g = 3
+ #
+
+ seqn <- s2n(seq, levels = s2c("tcag"))
+
+ #
+ # Compute the length of the sequence when its length in codons
+ # is an integer:
+ #
+
+ l <- 3*((length(seq) - frame) %/% 3)
+
+ #
+ # Compute the indices for the first codon positions:
+ #
+
+ c1 <- seq(from = frame + 1, to = frame + l, by = 3)
+
+ #
+ # Compute the indices of codons in the translation table:
+ #
+
+ tra <- 16*seqn[c1] + 4*seqn[c1 + 1] + seqn[c1 + 2] + 1
+
+ #
+ # Get the translation table:
+ #
+
+ code <- s2c(SEQINR.UTIL$CODES.NCBI$CODES[numcode])
+
+ #
+ # Translate the sequence:
+ #
+
+ result <- code[tra]
+
+ #
+ # Replace missing values by the string for missing amino-acids:
+ #
+
+ result[is.na(result)] <- NAstring
+
+ #
+ # More work is required if ambiguous bases are handled:
+ #
+
+ if(ambiguous){
+ toCheck <- which(result == NAstring)
+ for( i in toCheck ){
+ codon <- seq[c1[i]:(c1[i]+2)]
+ allcodons <- as.vector(outer(as.vector(outer(amb(codon[1]), amb(codon[2]), paste, sep = "")), amb(codon[3]), paste, sep = ""))
+ allaminoacids <- sapply(allcodons, function(x) translate(s2c(x), numcode = numcode, ambiguous = FALSE))
+ if( all(allaminoacids == allaminoacids[1])) result[i] <- allaminoacids[1]
+ }
+ }
+
+ return( result )
+}
+
diff --git a/R/trimSpace.R b/R/trimSpace.R
new file mode 100644
index 0000000..7b22141
--- /dev/null
+++ b/R/trimSpace.R
@@ -0,0 +1,11 @@
+trimSpace <- function(x, leading = TRUE, trailing = TRUE, space = "[:space:]"){
+ if(leading){
+ pattern <- paste("^[", space, "]*", sep = "", collapse = "")
+ x <- sub(pattern = pattern, replacement = "", x = x)
+ }
+ if(trailing){
+ pattern <- paste("[", space, "]*$", sep = "", collapse = "")
+ x <- sub(pattern = pattern, replacement = "", x = x)
+ }
+ return(x)
+}
diff --git a/R/uco.R b/R/uco.R
new file mode 100755
index 0000000..4eca45e
--- /dev/null
+++ b/R/uco.R
@@ -0,0 +1,121 @@
+uco <- function (seq, frame = 0, index = c("eff", "freq", "rscu"),
+as.data.frame = FALSE, NA.rscu = NA)
+{
+ choice <- match.arg(index)
+
+ if(any(seq%in%LETTERS)){
+ seq <- tolower(seq)
+ }
+ sequence <- splitseq(seq = seq, frame = frame, word = 3)
+
+ if( as.data.frame == FALSE ) {
+ eff <- table(factor(sequence, levels = SEQINR.UTIL$CODON.AA$CODON))
+ if(choice == "eff") return(eff)
+
+ freq <- eff/(floor(length(seq)/3))
+ if(choice == "freq") return(freq)
+
+ T <- split(freq, SEQINR.UTIL$CODON.AA$AA)
+ rscu <- lapply(T, function(x) {
+ return(x/((1/length(x)) * sum(x)))
+ })
+ names(rscu) <- NULL
+ rscu <- unlist(rscu)[as.character(SEQINR.UTIL$CODON.AA$CODON)]
+ is.na(rscu[!is.finite(rscu)]) <- TRUE
+ rscu[is.na(rscu)] <- NA.rscu
+ return(rscu)
+ } else { # return all indices in a data.frame
+ eff <- table(factor(sequence, levels = SEQINR.UTIL$CODON.AA$CODON))
+ freq <- eff/(floor(length(seq)/3))
+ T <- split(freq, SEQINR.UTIL$CODON.AA$AA)
+ rscu <- lapply(T, function(x) {
+ return(x/((1/length(x)) * sum(x)))
+ })
+ names(rscu) <- NULL
+ rscu <- unlist(rscu)[as.character(SEQINR.UTIL$CODON.AA$CODON)]
+ is.na(rscu[!is.finite(rscu)]) <- TRUE
+ rscu[is.na(rscu)] <- NA.rscu
+ df <- data.frame(SEQINR.UTIL$CODON.AA$AA, eff = eff, freq = as.vector(freq), RSCU = rscu)
+ names(df) = c("AA", "codon", "eff", "freq", "RSCU")
+ return(df)
+ }
+}
+
+
+dotchart.uco <- function(x, numcode = 1, aa3 = TRUE, cex = 0.7,
+ alphabet = s2c("tcag"), pch = 21, gpch = 20, bg = par("bg"),
+ color = par("fg"), gcolor = par("fg"), lcolor = "gray", xlim, ...)
+{
+ if( is.null(names(x)) ) names(x) <- words( alphabet = alphabet )
+#
+# General sorting
+#
+ x <- sort(x)
+ labels <- names(x)
+ stringlabel = paste(labels, sep="", collapse="")
+ groups <- as.factor(translate(s2c(stringlabel), numcode = numcode))
+ gdata <- sapply(split(x, groups), sum)
+#
+# Now, sorting by aa order
+#
+ gordered <- rank(gdata)
+ xidx <- numeric(64)
+
+ for( i in seq_len(64) )
+ {
+ xidx[i] <- -0.01*i + gordered[groups[i]]
+ }
+
+ x <- x[order(xidx)]
+ labels <- names(x)
+ stringlabel = paste(labels, sep="", collapse="")
+ aa <- translate(s2c(stringlabel), numcode = numcode)
+ groups <- factor(aa, levels = unique(aa))
+ gdata <- sapply(split(x, groups), sum)
+
+ if( missing(xlim) ) xlim <- c(0, max(gdata))
+ if( aa3 )
+ {
+ levels(groups) <- aaa(levels(groups))
+ }
+ dotchart(x = x, labels = labels, groups = groups, gdata = gdata,
+ cex = cex, pch = pch, gpch = gpch, bg = bg, color = color,
+ gcolor = gcolor, lcolor = lcolor, xlim, ...)
+#
+# Return invisibly for further plots
+#
+ result <- list(0)
+ result$x <- x
+ result$labels <- labels
+ result$groups <- groups
+ result$gdata <- gdata
+
+ ypg <- numeric( length(levels(groups)) )
+ i <- 1
+ for( aa in levels(groups) )
+ {
+ ypg[i] <- length(which(groups == aa)) + 2
+ i <- i + 1
+ }
+ ypg <- rev(cumsum(rev(ypg))) - 1
+ names(ypg) <- levels(groups)
+ result$ypg <- ypg
+
+ ypi <- numeric( length(x) )
+ for( i in seq_len(length(x)) )
+ {
+ ypi[i] <- ypg[groups[i]]
+ }
+ antirank <- function(x)
+ {
+ return( seq(length(x),1,by=-1 ))
+ }
+ ypi <- ypi - unlist(sapply(split(x, groups),antirank))
+ names(ypi) <- labels
+ result$ypi <- ypi
+
+ return( invisible(result) )
+}
+
+
+
diff --git a/R/util.R b/R/util.R
new file mode 100644
index 0000000..0948c6b
--- /dev/null
+++ b/R/util.R
@@ -0,0 +1,93 @@
+########################
+# char to string
+########################
+
+c2s <- function( chars = c("m","e","r","g","e","d") )
+{
+ return( paste( chars, collapse = "" ) )
+}
+
+###########################
+# string to char
+############################
+
+s2c <- function (string)
+{
+ if(is.character(string) && length(string) == 1){
+ return(.Call("s2c", string, PACKAGE = "seqinr"))
+ } else {
+ warning("Wrong argument type in s2c(), NA returned")
+ return(NA)
+ }
+}
+
+
+
+###########################
+# Conversion of the numeric encoding of a DNA sequence into
+# a vector of chars
+############################
+
+n2s <- function(nseq, levels = c("a", "c", "g", "t"), base4 = TRUE)
+{
+ if( base4 )
+ levels[nseq + 1]
+ else
+ levels[nseq]
+}
+
+
+
+##########################################
+# Convert one-letter code to 3-letters code for amino-acids
+##########################################
+
+aaa <- function( aa )
+{
+ aa3 <- c("Stp", "Ala", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile",
+ "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr",
+ "Val", "Trp", "Tyr") # One letter code order
+ if(missing(aa)) return(aa3)
+ aa1 <- a()
+
+ convert <- function( x )
+ {
+ if( all( x != aa1 ) )
+ {
+ warning("Unknown one letter code for aminoacid")
+ return( NA )
+ }
+ else
+ {
+ return( aa3[which( x == aa1 )] )
+ }
+ }
+ return( as.vector(unlist(sapply( aa, convert ) ) ) )
+}
+
+##########################################
+# Conversion 3-letters code to one letter code for amino-acids
+##########################################
+
+a <- function( aa )
+{
+ aa1 <- s2c("*ACDEFGHIKLMNPQRSTVWY")
+ if(missing(aa)) return(aa1)
+ aa3 <- aaa()
+
+ convert <- function( x )
+ {
+ if( all( x != aa3 ) )
+ {
+ warning("Unknown 3-letters code for aminoacid")
+ return( NA )
+ }
+ else
+ {
+ return( aa1[which( x == aa3 )] )
+ }
+ }
+ return( as.vector(unlist(sapply( aa, convert ) ) ) )
+}
+
+
diff --git a/R/where.is.this.acc.R b/R/where.is.this.acc.R
new file mode 100644
index 0000000..0a70318
--- /dev/null
+++ b/R/where.is.this.acc.R
@@ -0,0 +1,55 @@
+where.is.this.acc <- function(acc, stopAtFirst = TRUE, ...){
+ #
+ # Argument check:
+ #
+ if(!is.character(acc)) stop("string expected for argument acc")
+ #
+ result <- character(0)
+ #
+ cat("Looking for available databases\n")
+ banks <- choosebank(...)
+ nbanks <- length(banks)
+ cat(paste("Looking for sequence with accession number", acc,
+ "in the following ACNUC databases:\n"))
+ print(banks)
+ #
+ # Looping over banks:
+ #
+ for(i in seq_len(nbanks)){
+ cat(paste("\nTrying to open bank with name --->", banks[i],
+ "<--- ...", sep = ""))
+ bkopenres <- try(choosebank(banks[i]))
+ if(inherits(bkopenres, "try-error")){
+ cat("... opening not OK, skipping this bank.\n")
+ } else {
+ cat("... and opening was OK.\n")
+ cat(paste("==> Trying to find sequence", acc, "in bank",
+ banks[i], "..."))
+ resquery <- try(query(".tmpquery", paste("AC=", acc)), silent = TRUE)
+ if(inherits(resquery, "try-error")){
+ cat("... not found here.\n")
+ } else {
+ cat("... *** FOUND *** here.\n")
+ result <- c(result, banks[i])
+ }
+ closebank()
+ if(length(result) != 0 && stopAtFirst){
+ return(invisible(result))
+ }
+ }
+}
+#
+# Print result summary:
+#
+ cat("\n\n")
+ if(length(result) == 0){
+ cat(paste("Sequence with accesion number", acc,
+ "was not found in available databases.\n Are you sure this is an accession number and not a sequence name?"))
+ } else {
+ cat(paste("Sequence with accesion number", acc,
+ "was found in the following database(s):\n"))
+ print(result)
+ }
+ invisible(result)
+}
+
diff --git a/R/words.R b/R/words.R
new file mode 100644
index 0000000..224103d
--- /dev/null
+++ b/R/words.R
@@ -0,0 +1,7 @@
+words <- function(length = 3, alphabet = s2c("acgt") )
+{
+ if( length == 1 )
+ return( alphabet )
+ else
+ kronecker( alphabet, words(length - 1, alphabet ), paste, sep = "")
+}
diff --git a/R/words.pos.R b/R/words.pos.R
new file mode 100644
index 0000000..451c223
--- /dev/null
+++ b/R/words.pos.R
@@ -0,0 +1,14 @@
+words.pos <- function(pattern, text, ignore.case = FALSE,
+ perl = TRUE, fixed = FALSE, useBytes = TRUE, ...)
+{
+ position <- regexpr(pattern, text, ignore.case, perl, fixed, useBytes, ...)[1]
+ result <- numeric(0)
+ while(position != -1 )
+ {
+ result <- c(result, position )
+ text <- substr(text, position + 1, nchar(text))
+ position <- regexpr(pattern, text, ignore.case, perl, fixed, useBytes, ...)[1]
+ }
+ return(cumsum(result))
+}
+
diff --git a/R/write.fasta.R b/R/write.fasta.R
new file mode 100644
index 0000000..10646aa
--- /dev/null
+++ b/R/write.fasta.R
@@ -0,0 +1,39 @@
+write.fasta <- function(sequences, names, file.out, open = "w", nbchar = 60,
+ as.string = FALSE){
+ #
+ # Open output file:
+ #
+ outfile <- file(description = file.out, open = open)
+
+ #
+ # Function to write one sequence in output file:
+ #
+ write.oneseq <- function(sequence, name, nbchar, as.string){
+ writeLines(paste(">", name, sep = ""), outfile)
+ if(as.string) sequence <- s2c(sequence)
+ l <- length(sequence)
+ q <- floor(l/nbchar)
+ r <- l - nbchar*q
+ if(q > 0){
+ sapply(seq_len(q), function(x) writeLines(c2s(sequence[(nbchar*(x - 1) + 1):(nbchar*x)]), outfile))
+ }
+ if(r > 0){
+ writeLines(c2s(sequence[(nbchar*q + 1):l]), outfile)
+ }
+ }
+
+ #
+ # Write all sequences in output file:
+ #
+ if(!is.list(sequences)){
+ write.oneseq(sequence = sequences, name = names, nbchar = nbchar, as.string = as.string)
+ } else {
+ n.seq <- length(sequences)
+ sapply(seq_len(n.seq), function(x) write.oneseq(sequence = as.character(sequences[[x]]),
+ name = names[x], nbchar = nbchar, as.string = as.string))
+ }
+ #
+ # Close output file:
+ #
+ close(outfile)
+}
diff --git a/R/zscore.R b/R/zscore.R
new file mode 100755
index 0000000..e1b3d56
--- /dev/null
+++ b/R/zscore.R
@@ -0,0 +1,63 @@
+
+zscore <- function (sequence, simulations = NULL, modele, exact = FALSE, alphabet = s2c("acgt"), ... ){
+ if (is.null(simulations)){
+ if (modele=="base"){
+ uni <- count(sequence, 1, freq = TRUE, alphabet = alphabet)
+ di <- count(sequence, 2, freq = TRUE, alphabet = alphabet)
+ rep1 <- rep(uni, 4)
+ rep2 <- rep(uni, each = 4)
+ rho <- di/(rep1*rep2)
+ if (exact==FALSE){
+ zscore <- ((rho-1)/sqrt(((1-rep1)*(1-rep2))/((length(sequence))*rep1*rep2)))
+ }
+ else if (exact==TRUE){
+ rm(uni,di,rep1,rep2)
+ uni <- count(sequence, 1, freq = FALSE, alphabet = alphabet)
+ di <- count(sequence, 2, freq = FALSE, alphabet = alphabet)
+ rep1 <- rep(uni, 4)
+ rep2 <- rep(uni, each = 4)
+ n <- length(sequence)
+ e <- c(rep(c(0,rep(n/(n-1),4)),3),0)
+ for (i in seq(1,4)){
+ e[1+5*(i-1)] <- (n*(uni[i]-1))/(uni[i]*(n-1))
+ }
+ v=NULL
+ for (i in seq(1,16)){
+ if (i==1 | i==6 | i==11 | i==16){
+ v[i] <- ((n^2*(rep1[i]-1))/(rep1[i]^2*(n-1)^2))*(1-rep1[i]+(n/rep1[i])*(1+2*(rep1[i]-2)/(n-1)+((n-4)*(rep1[i]-2)*(rep1[i]-3))/((n-2)*(n-3))))
+ }
+ else{
+ v[i] <- ((n^3)/((n-1)^2*rep1[i]*rep2[i]-n^2/(n-1)^2+(n^3*(n-4)*(rep1[i]-1)*(rep2[i]-1))/(rep1[i]*rep2[i]*(n-2)*(n-3)*(n-1)^2)))
+ }
+ }
+ zscore <- ((rho-e)/sqrt(v))
+ }
+ }
+ else if (modele=="codon"){
+ split <- splitseq(sequence)
+ n <- length(split)
+ pos <- sort(c(seq(3,length(sequence)-1,by=3),seq(4,length(sequence),by=3))) #position3-1 codons
+ xy <- table(factor(splitseq(sequence[pos],word=2),levels=words(2)))
+ n1 <- sapply(s2c('acgt'), function(x){length(which(substring(split,3,3)==x))})
+ n1 <- rep(n1,each=4)
+ n2 <- sapply(s2c('acgt'), function(x){length(which(substring(split,1,1)==x))})
+ n2 <- rep(n2,4)
+ n3 <- sapply(s2c('acgt'), function (x){sapply(s2c('acgt'), function(y){length(which(substring(split,1,1)==y & substring(split,3,3)==x))})})
+ n3 <- as.vector(n3)
+ e <- (n1*n2-n3)/n
+ v <- (e+((1/(n*(n-1)))*(2*n3*(n1+n2-n1*n2-1)+n1*n2*(n1-1)*(n2-1))))-e^2
+ zscore <- ((xy-e)/sqrt(v))
+ }
+ else{
+ stop("analytical solution not implemented for this model")
+ }
+ }
+ else { #simulations = nb d'iterations
+ rhopermut <- sapply(seq(simulations),function(x){rho(permutation(sequence =
+ sequence, modele = modele, ...))})
+ mean <- sapply(seq(dim(rhopermut)[1]),function(x){mean(rhopermut[x,])})
+ var <- sapply(seq(dim(rhopermut)[1]),function(x){var(rhopermut[x,])})
+ zscore <- ((rho(sequence)-mean)/sqrt(var))
+ }
+ return(zscore)
+}
diff --git a/data/AnoukResult.RData b/data/AnoukResult.RData
new file mode 100644
index 0000000..7842380
Binary files /dev/null and b/data/AnoukResult.RData differ
diff --git a/data/ECH.RData b/data/ECH.RData
new file mode 100644
index 0000000..b0cbd74
Binary files /dev/null and b/data/ECH.RData differ
diff --git a/data/EXP.RData b/data/EXP.RData
new file mode 100644
index 0000000..859d313
Binary files /dev/null and b/data/EXP.RData differ
diff --git a/data/JLO.RData b/data/JLO.RData
new file mode 100644
index 0000000..452be8e
Binary files /dev/null and b/data/JLO.RData differ
diff --git a/data/SEQINR.UTIL.RData b/data/SEQINR.UTIL.RData
new file mode 100644
index 0000000..df81124
Binary files /dev/null and b/data/SEQINR.UTIL.RData differ
diff --git a/data/aacost.RData b/data/aacost.RData
new file mode 100644
index 0000000..3e14ea2
Binary files /dev/null and b/data/aacost.RData differ
diff --git a/data/aaindex.RData b/data/aaindex.RData
new file mode 100644
index 0000000..4239ca6
Binary files /dev/null and b/data/aaindex.RData differ
diff --git a/data/caitab.RData b/data/caitab.RData
new file mode 100644
index 0000000..101f9e8
Binary files /dev/null and b/data/caitab.RData differ
diff --git a/data/chargaff.RData b/data/chargaff.RData
new file mode 100644
index 0000000..de8a70c
Binary files /dev/null and b/data/chargaff.RData differ
diff --git a/data/clustal.RData b/data/clustal.RData
new file mode 100644
index 0000000..8d65c4f
Binary files /dev/null and b/data/clustal.RData differ
diff --git a/data/datalist b/data/datalist
new file mode 100644
index 0000000..6d53430
--- /dev/null
+++ b/data/datalist
@@ -0,0 +1,26 @@
+AnoukResult
+ECH
+EXP
+JLO
+SEQINR.UTIL
+aacost
+aaindex
+caitab
+chargaff
+clustal
+dinucl
+ec999
+fasta
+gs500liz
+identifiler
+m16j
+mase
+msf
+pK
+phylip
+prochlo
+revaligntest
+sysdata: SEQINR.UTIL
+toyaa
+toycodon
+waterabs
diff --git a/data/dinucl.RData b/data/dinucl.RData
new file mode 100644
index 0000000..7d4b56c
Binary files /dev/null and b/data/dinucl.RData differ
diff --git a/data/ec999.RData b/data/ec999.RData
new file mode 100644
index 0000000..f6fbb8e
Binary files /dev/null and b/data/ec999.RData differ
diff --git a/data/fasta.RData b/data/fasta.RData
new file mode 100644
index 0000000..0624d1c
Binary files /dev/null and b/data/fasta.RData differ
diff --git a/data/gcO2.rda b/data/gcO2.rda
new file mode 100644
index 0000000..4d5a7c3
Binary files /dev/null and b/data/gcO2.rda differ
diff --git a/data/gcT.rda b/data/gcT.rda
new file mode 100644
index 0000000..a8255ba
Binary files /dev/null and b/data/gcT.rda differ
diff --git a/data/gs500liz.RData b/data/gs500liz.RData
new file mode 100644
index 0000000..9b49439
Binary files /dev/null and b/data/gs500liz.RData differ
diff --git a/data/identifiler.RData b/data/identifiler.RData
new file mode 100644
index 0000000..054fd9c
Binary files /dev/null and b/data/identifiler.RData differ
diff --git a/data/m16j.RData b/data/m16j.RData
new file mode 100644
index 0000000..6de552d
Binary files /dev/null and b/data/m16j.RData differ
diff --git a/data/mase.RData b/data/mase.RData
new file mode 100644
index 0000000..63291fd
Binary files /dev/null and b/data/mase.RData differ
diff --git a/data/msf.RData b/data/msf.RData
new file mode 100644
index 0000000..4b2771e
Binary files /dev/null and b/data/msf.RData differ
diff --git a/data/pK.RData b/data/pK.RData
new file mode 100644
index 0000000..b2841b7
Binary files /dev/null and b/data/pK.RData differ
diff --git a/data/phylip.RData b/data/phylip.RData
new file mode 100644
index 0000000..5fcca0d
Binary files /dev/null and b/data/phylip.RData differ
diff --git a/data/prochlo.RData b/data/prochlo.RData
new file mode 100644
index 0000000..2b8d9c6
Binary files /dev/null and b/data/prochlo.RData differ
diff --git a/data/revaligntest.RData b/data/revaligntest.RData
new file mode 100644
index 0000000..24850e2
Binary files /dev/null and b/data/revaligntest.RData differ
diff --git a/data/sysdata.rda b/data/sysdata.rda
new file mode 100644
index 0000000..df81124
Binary files /dev/null and b/data/sysdata.rda differ
diff --git a/data/toyaa.RData b/data/toyaa.RData
new file mode 100644
index 0000000..b808c37
Binary files /dev/null and b/data/toyaa.RData differ
diff --git a/data/toycodon.RData b/data/toycodon.RData
new file mode 100644
index 0000000..90cc976
Binary files /dev/null and b/data/toycodon.RData differ
diff --git a/data/waterabs.RData b/data/waterabs.RData
new file mode 100644
index 0000000..06f2f97
Binary files /dev/null and b/data/waterabs.RData differ
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index d0b3cf4..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,13 +0,0 @@
-r-cran-seqinr (3.3-0-1) unstable; urgency=medium
-
- * New upstream version
- * New (Build-)Depends: r-cran-segmented
- * cme fix dpkg-control
-
- -- Andreas Tille <tille at debian.org> Mon, 08 Aug 2016 08:51:41 +0200
-
-r-cran-seqinr (3.1-3-1) unstable; urgency=medium
-
- * Initial upload (Closes: #790717)
-
- -- Andreas Tille <tille at debian.org> Wed, 01 Jul 2015 08:17:22 +0200
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 7248b05..0000000
--- a/debian/control
+++ /dev/null
@@ -1,26 +0,0 @@
-Source: r-cran-seqinr
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
-Section: gnu-r
-Priority: optional
-Build-Depends: debhelper (>= 9),
- cdbs,
- r-base-dev,
- r-cran-ade4,
- r-cran-segmented
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-med/trunk/packages/R/r-cran-seqinr/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/R/r-cran-seqinr/
-Homepage: http://cran.r-project.org/web/packages/seqinr
-
-Package: r-cran-seqinr
-Architecture: any
-Depends: ${shlibs:Depends},
- ${misc:Depends},
- ${R:Depends},
- r-cran-ade4,
- r-cran-segmented
-Description: GNU R biological sequences retrieval and analysis
- Exploratory data analysis and data visualization for biological sequence
- (DNA and protein) data. Includes also utilities for sequence data
- management under the ACNUC system.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 329a95b..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,27 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Contact: Simon Penel <simon.penel at univ-lyon1.fr>
-Upstream-Name: seqinr
-Source: http://cran.r-project.org/web/packages/seqinr/
-
-Files: *
-Copyright: 2007-2014 Delphine Charif, Jean R. Lobry, Anamaria Necsulea,
- Leonor Palmeira, Simon Penel, Guy Perriere
-License: GPL-2+
-
-Files: debian/*
-Copyright: 2015 Andreas Tille <tille at debian.org>
-License: GPL-2+
-
-License: GPL-2+
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- .
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- .
- On Debian systems, the complete text of the GNU General Public License
- version 2 can be found in ‘/usr/share/common-licenses/GPL-2’.
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index a65e05f..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/make -f
-
-include /usr/share/R/debian/r-cran.mk
-
-install/$(package)::
- chmod 644 debian/$(package)/usr/lib/R/site-library/$(cranName)/sequences/*
- chmod 644 debian/$(package)/usr/lib/R/site-library/$(cranName)/abif/*
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/upstream/metadata b/debian/upstream/metadata
deleted file mode 100644
index de4ab13..0000000
--- a/debian/upstream/metadata
+++ /dev/null
@@ -1,9 +0,0 @@
-Reference:
- Author: Delphine Charif and Jean R. Lobry
- Title: "SeqinR 1.0-2: a contributed package to the R project for statistical computing devoted to biological sequences retrieval and analysis"
- Booktitle: "Structural approaches to sequence evolution: Molecules, networks, populations"
- Editor: Ugo Bastolla and Markus Porto and H. Eduardo Roman and Michele Vendruscolo
- ISBN: 978-3-540-35305-8
- Year: 2007
- Pages: 207-232
- URL: http://link.springer.com/book/10.1007%2F978-3-540-35306-5
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index c2e660b..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,2 +0,0 @@
-version=3
-http://cran.r-project.org/src/contrib/seqinr_([\d.-]*)\.tar.gz
diff --git a/inst/CITATION b/inst/CITATION
new file mode 100644
index 0000000..b86e73f
--- /dev/null
+++ b/inst/CITATION
@@ -0,0 +1,15 @@
+citHeader("To cite seqinr in publications use:")
+
+citEntry(entry = "incollection",
+ author = "D. Charif, J.R. Lobry",
+ title = "Seqin{R} 1.0-2: a contributed package to the {R} project for statistical computing devoted to biological sequences retrieval and analysis.",
+ booktitle = "Structural approaches to sequence evolution: Molecules, networks, populations",
+ year = "2007",
+ editor = "U. Bastolla, M. Porto, H.E. Roman and M. Vendruscolo",
+ series = "Biological and Medical Physics, Biomedical Engineering",
+ pages = "207-232",
+ address = "New York",
+ publisher = "Springer Verlag",
+ note = "{ISBN :} 978-3-540-35305-8",
+
+ textVersion = "Charif, D. and Lobry, J.R. (2007)")
diff --git a/inst/abif/1_0000206138_C01_005.fsa b/inst/abif/1_0000206138_C01_005.fsa
new file mode 100755
index 0000000..b076382
Binary files /dev/null and b/inst/abif/1_0000206138_C01_005.fsa differ
diff --git a/inst/abif/1_FAC321_0000205983_B02_004.fsa b/inst/abif/1_FAC321_0000205983_B02_004.fsa
new file mode 100755
index 0000000..d900a2f
Binary files /dev/null and b/inst/abif/1_FAC321_0000205983_B02_004.fsa differ
diff --git a/inst/abif/2_0000206138_C01_005.fsa b/inst/abif/2_0000206138_C01_005.fsa
new file mode 100755
index 0000000..2974d39
Binary files /dev/null and b/inst/abif/2_0000206138_C01_005.fsa differ
diff --git a/inst/abif/2_FAC321_0000205983_B02_004.fsa b/inst/abif/2_FAC321_0000205983_B02_004.fsa
new file mode 100755
index 0000000..685ba62
Binary files /dev/null and b/inst/abif/2_FAC321_0000205983_B02_004.fsa differ
diff --git a/inst/abif/AmpFLSTR_Bins_v1.txt b/inst/abif/AmpFLSTR_Bins_v1.txt
new file mode 100644
index 0000000..9284785
--- /dev/null
+++ b/inst/abif/AmpFLSTR_Bins_v1.txt
@@ -0,0 +1 @@
+Version GM v 3.0
Chemistry Kit AmpFLSTR_Panels_v1
BinSet Name AmpFLSTR_Bins_v1
Panel Name Blue_v1
Marker Name D3S1358
11 110.0 0.5 0.5
12 114.0 0.5 0.5
13 118.0 0.5 0.5
14 122.0 0.5 0.5
15 126.0 0.5 0.5
15.2 128.0 0.5 0.5
16 130.0 0.5 0.5
16.2 132.0 0.5 0.5
17 134.0 0.5 0.5
17.2 136.0 0.5 0.5
18 138.0 0.5 0.5
18.2 140.0 0.5 0.5
19 142.0 0.5 0.5
20 146.0 0.5 0.5
Marker Name vWA
10 153.0 0.5 0.5
11 157.0 0.5 0.5
12 161.0 0.5 0.5
13 165.0 0.5 0.5
14 169.0 0.5 0.5
15 173.0 0.5 0.5
15.2 175.0 0.5 0.5
16 177.0 0.5 0.5
17 181.0 0.5 0.5
18 185.0 0.5 0.5
18.2 187.0 0.5 0.5
19 189.0 0.5 0.5
20 193.0 0.5 0.5
21 197.0 0.5 0.5
22 201.0 0.5 0.5
Marker Name FGA
17 215.0 0.5 0.5
17.2 217.0 0.5 0.5
18 219.0 0.5 0.5
18.2 221.0 0.5 0.5
19 223.0 0.5 0.5
19.2 225.0 0.5 0.5
20 227.0 0.5 0.5
20.2 229.0 0.5 0.5
21 231.0 0.5 0.5
21.2 233.0 0.5 0.5
22 235.0 0.5 0.5
22.2 237.0 0.5 0.5
23 239.0 0.5 0.5
23.2 241.0 0.5 0.5
24 243.0 0.5 0.5
24.2 245.0 0.5 0.5
25 247.0 0.5 0.5
25.2 249.0 0.5 0.5
26 251.0 0.5 0.5
26.2 253.0 0.5 0.5
27 255.0 0.5 0.5
27.2 257.0 0.5 0.5
28 259.0 0.5 0.5
28.2 261.0 0.5 0.5
29 263.0 0.5 0.5
29.2 265.0 0.5 0.5
30 267.0 0.5 0.5
30.2 269.0 0.5 0.5
31 271.0 0.5 0.5
Panel Name Green_I_v1
Marker Name AMEL
X 107.0 0.5 0.5
Y 113.0 0.5 0.5
Marker Name TH01
4 165.0 0.5 0.5
5 169.0 0.5 0.5
5.3 172.0 0.5 0.4
6 173.0 0.4 0.5
6.3 176.0 0.5 0.4
7 177.0 0.4 0.5
7.3 180.0 0.5 0.4
8 181.0 0.4 0.5
8.3 184.0 0.5 0.4
9 185.0 0.4 0.5
9.3 188.0 0.5 0.4
10 189.0 0.4 0.5
10.3 192.0 0.5 0.4
11 193.0 0.4 0.5
Marker Name TPOX
5 214.0 0.5 0.5
6 218.0 0.5 0.5
7 222.0 0.5 0.5
8 226.0 0.5 0.5
9 230.0 0.5 0.5
10 234.0 0.5 0.5
11 238.0 0.5 0.5
12 242.0 0.5 0.5
13 246.0 0.5 0.5
14 250.0 0.5 0.5
Marker Name CSF1PO
6 281.0 0.5 0.5
7 285.0 0.5 0.5
8 289.0 0.5 0.5
9 293.0 0.5 0.5
10 297.0 0.5 0.5
10.2 299.0 0.5 0.5
11 301.0 0.5 0.5
12 305.0 0.5 0.5
13 309.0 0.5 0.5
14 313.0 0.5 0.5
15 317.0 0.5 0.5
16 321.0 0.5 0.5
Panel Name Profiler_v1
Marker Name D3S1358
11 110.0 0.5 0.5
12 114.0 0.5 0.5
13 118.0 0.5 0.5
14 122.0 0.5 0.5
15 126.0 0.5 0.5
15.2 128.0 0.5 0.5
16 130.0 0.5 0.5
16.2 132.0 0.5 0.5
17 134.0 0.5 0.5
17.2 136.0 0.5 0.5
18 138.0 0.5 0.5
18.2 140.0 0.5 0.5
19 142.0 0.5 0.5
20 146.0 0.5 0.5
Marker Name vWA
10 153.0 0.5 0.5
11 157.0 0.5 0.5
12 161.0 0.5 0.5
13 165.0 0.5 0.5
14 169.0 0.5 0.5
15 173.0 0.5 0.5
15.2 175.0 0.5 0.5
16 177.0 0.5 0.5
17 181.0 0.5 0.5
18 185.0 0.5 0.5
18.2 187.0 0.5 0.5
19 189.0 0.5 0.5
20 193.0 0.5 0.5
21 197.0 0.5 0.5
22 201.0 0.5 0.5
Marker Name FGA
17 215.0 0.5 0.5
17.2 217.0 0.5 0.5
18 219.0 0.5 0.5
18.2 221.0 0.5 0.5
19 223.0 0.5 0.5
19.2 225.0 0.5 0.5
20 227.0 0.5 0.5
20.2 229.0 0.5 0.5
21 231.0 0.5 0.5
21.2 233.0 0.5 0.5
22 235.0 0.5 0.5
22.2 237.0 0.5 0.5
23 239.0 0.5 0.5
23.2 241.0 0.5 0.5
24 243.0 0.5 0.5
24.2 245.0 0.5 0.5
25 247.0 0.5 0.5
25.2 249.0 0.5 0.5
26 251.0 0.5 0.5
26.2 253.0 0.5 0.5
27 255.0 0.5 0.5
27.2 257.0 0.5 0.5
28 259.0 0.5 0.5
28.2 261.0 0.5 0.5
29 263.0 0.5 0.5
29.2 265.0 0.5 0.5
30 267.0 0.5 0.5
30.2 269.0 0.5 0.5
31 271.0 0.5 0.5
Marker Name AMEL
X 107.0 0.5 0.5
Y 113.0 0.5 0.5
Marker Name TH01
4 165.0 0.5 0.5
5 169.0 0.5 0.5
5.3 172.0 0.5 0.4
6 173.0 0.4 0.5
6.3 176.0 0.5 0.4
7 177.0 0.4 0.5
7.3 180.0 0.5 0.4
8 181.0 0.4 0.5
8.3 184.0 0.5 0.4
9 185.0 0.4 0.5
9.3 188.0 0.5 0.4
10 189.0 0.4 0.5
10.3 192.0 0.5 0.4
11 193.0 0.4 0.5
Marker Name TPOX
5 214.0 0.5 0.5
6 218.0 0.5 0.5
7 222.0 0.5 0.5
8 226.0 0.5 0.5
9 230.0 0.5 0.5
10 234.0 0.5 0.5
11 238.0 0.5 0.5
12 242.0 0.5 0.5
13 246.0 0.5 0.5
14 250.0 0.5 0.5
Marker Name CSF1PO
6 281.0 0.5 0.5
7 285.0 0.5 0.5
8 289.0 0.5 0.5
9 293.0 0.5 0.5
10 297.0 0.5 0.5
10.2 299.0 0.5 0.5
11 301.0 0.5 0.5
12 305.0 0.5 0.5
13 309.0 0.5 0.5
14 313.0 0.5 0.5
15 317.0 0.5 0.5
16 321.0 0.5 0.5
Marker Name D5S818
6 131.0 0.5 0.5
7 135.0 0.5 0.5
8 139.0 0.5 0.5
9 143.0 0.5 0.5
10 147.0 0.5 0.5
11 151.0 0.5 0.5
12 155.0 0.5 0.5
13 159.0 0.5 0.5
14 163.0 0.5 0.5
15 167.0 0.5 0.5
16 171.0 0.5 0.5
17 175.0 0.5 0.5
Marker Name D13S317
7 202.0 0.5 0.5
8 206.0 0.5 0.5
9 210.0 0.5 0.5
10 214.0 0.5 0.5
11 218.0 0.5 0.5
12 222.0 0.5 0.5
13 226.0 0.5 0.5
14 230.0 0.5 0.5
15 234.0 0.5 0.5
16 238.0 0.5 0.5
Marker Name D7S820
5 254.0 0.5 0.5
5.2 256.0 0.5 0.5
6 258.0 0.5 0.5
7 262.0 0.5 0.5
8 266.0 0.5 0.5
8.2 268.0 0.5 0.5
9 270.0 0.5 0.5
9.2 272.0 0.5 0.5
10 274.0 0.5 0.5
11 278.0 0.5 0.5
12 282.0 0.5 0.5
13 286.0 0.5 0.5
14 290.0 0.5 0.5
15 294.0 0.5 0.5
16 298.0 0.5 0.5
Panel Name Profiler_Plus_v1
Marker Name D3S1358
11 110.0 0.5 0.5
12 114.0 0.5 0.5
13 118.0 0.5 0.5
14 122.0 0.5 0.5
15 126.0 0.5 0.5
15.2 128.0 0.5 0.5
16 130.0 0.5 0.5
16.2 132.0 0.5 0.5
17 134.0 0.5 0.5
17.2 136.0 0.5 0.5
18 138.0 0.5 0.5
18.2 140.0 0.5 0.5
19 142.0 0.5 0.5
20 146.0 0.5 0.5
Marker Name vWA
10 153.0 0.5 0.5
11 157.0 0.5 0.5
12 161.0 0.5 0.5
13 165.0 0.5 0.5
14 169.0 0.5 0.5
15 173.0 0.5 0.5
15.2 175.0 0.5 0.5
16 177.0 0.5 0.5
17 181.0 0.5 0.5
18 185.0 0.5 0.5
18.2 187.0 0.5 0.5
19 189.0 0.5 0.5
20 193.0 0.5 0.5
21 197.0 0.5 0.5
22 201.0 0.5 0.5
Marker Name FGA
17 215.0 0.5 0.5
17.2 217.0 0.5 0.5
18 219.0 0.5 0.5
18.2 221.0 0.5 0.5
19 223.0 0.5 0.5
19.2 225.0 0.5 0.5
20 227.0 0.5 0.5
20.2 229.0 0.5 0.5
21 231.0 0.5 0.5
21.2 233.0 0.5 0.5
22 235.0 0.5 0.5
22.2 237.0 0.5 0.5
23 239.0 0.5 0.5
23.2 241.0 0.5 0.5
24 243.0 0.5 0.5
24.2 245.0 0.5 0.5
25 247.0 0.5 0.5
25.2 249.0 0.5 0.5
26 251.0 0.5 0.5
26.2 253.0 0.5 0.5
27 255.0 0.5 0.5
27.2 257.0 0.5 0.5
28 259.0 0.5 0.5
28.2 261.0 0.5 0.5
29 263.0 0.5 0.5
29.2 265.0 0.5 0.5
30 267.0 0.5 0.5
30.2 269.0 0.5 0.5
31 271.0 0.5 0.5
Marker Name AMEL
X 107.0 0.5 0.5
Y 113.0 0.5 0.5
Marker Name D8S1179
7 124.0 0.5 0.5
8 128.0 0.5 0.5
9 132.0 0.5 0.5
10 136.0 0.5 0.5
11 140.0 0.5 0.5
12 144.0 0.5 0.5
13 148.0 0.5 0.5
14 152.0 0.5 0.5
15 156.0 0.5 0.5
16 160.0 0.5 0.5
17 164.0 0.5 0.5
18 168.0 0.5 0.5
19 172.0 0.5 0.5
20 176.0 0.5 0.5
Marker Name D21S11
23.2 185.0 0.5 0.5
24 187.0 0.5 0.5
24.2 189.0 0.5 0.5
25 191.0 0.5 0.5
25.2 193.0 0.5 0.5
26 195.0 0.5 0.5
26.2 197.0 0.5 0.5
27 199.0 0.5 0.5
27.2 201.0 0.5 0.5
28 203.0 0.5 0.5
28.2 205.0 0.5 0.5
29 207.0 0.5 0.5
29.2 209.0 0.5 0.5
30 211.0 0.5 0.5
30.2 213.0 0.5 0.5
31 215.0 0.5 0.5
31.2 217.0 0.5 0.5
32 219.0 0.5 0.5
32.2 221.0 0.5 0.5
33 223.0 0.5 0.5
33.2 225.0 0.5 0.5
34 227.0 0.5 0.5
34.2 229.0 0.5 0.5
35 231.0 0.5 0.5
35.2 233.0 0.5 0.5
36 235.0 0.5 0.5
36.2 237.0 0.5 0.5
37 239.0 0.5 0.5
37.2 241.0 0.5 0.5
38 243.0 0.5 0.5
38.2 245.0 0.5 0.5
39 247.0 0.5 0.5
Marker Name D18S51
7 265.0 0.5 0.5
8 269.0 0.5 0.5
9 273.0 0.5 0.5
9.2 275.0 0.5 0.5
10 277.0 0.5 0.5
10.2 279.0 0.5 0.5
11 281.0 0.5 0.5
11.2 283.0 0.5 0.5
12 285.0 0.5 0.5
12.2 287.0 0.5 0.5
13 289.0 0.5 0.5
13.2 291.0 0.5 0.5
14 293.0 0.5 0.5
14.2 295.0 0.5 0.5
15 297.0 0.5 0.5
15.2 299.0 0.5 0.5
16 301.0 0.5 0.5
16.2 303.0 0.5 0.5
17 305.0 0.5 0.5
17.2 307.0 0.5 0.5
18 309.0 0.5 0.5
18.2 311.0 0.5 0.5
19 313.0 0.5 0.5
19.2 315.0 0.5 0.5
20 317.0 0.5 0.5
20.2 319.0 0.5 0.5
21 321.0 0.5 0.5
21.2 323.0 0.5 0.5
22 325.0 0.5 0.5
22.2 327.0 0.5 0.5
23 329.0 0.5 0.5
23.2 331.0 0.5 0.5
24 333.0 0.5 0.5
25 337.0 0.5 0.5
26 341.0 0.5 0.5
27 345.0 0.5 0.5
Marker Name D5S818
6 131.0 0.5 0.5
7 135.0 0.5 0.5
8 139.0 0.5 0.5
9 143.0 0.5 0.5
10 147.0 0.5 0.5
11 151.0 0.5 0.5
12 155.0 0.5 0.5
13 159.0 0.5 0.5
14 163.0 0.5 0.5
15 167.0 0.5 0.5
16 171.0 0.5 0.5
17 175.0 0.5 0.5
Marker Name D13S317
7 202.0 0.5 0.5
8 206.0 0.5 0.5
9 210.0 0.5 0.5
10 214.0 0.5 0.5
11 218.0 0.5 0.5
12 222.0 0.5 0.5
13 226.0 0.5 0.5
14 230.0 0.5 0.5
15 234.0 0.5 0.5
16 238.0 0.5 0.5
Marker Name D7S820
5 254.0 0.5 0.5
5.2 256.0 0.5 0.5
6 258.0 0.5 0.5
7 262.0 0.5 0.5
8 266.0 0.5 0.5
8.2 268.0 0.5 0.5
9 270.0 0.5 0.5
9.2 272.0 0.5 0.5
10 274.0 0.5 0.5
11 278.0 0.5 0.5
12 282.0 0.5 0.5
13 286.0 0.5 0.5
14 290.0 0.5 0.5
15 294.0 0.5 0.5
16 298.0 0.5 0.5
Panel Name COfiler_v1
Marker Name D3S1358
11 110.0 0.5 0.5
12 114.0 0.5 0.5
13 118.0 0.5 0.5
14 122.0 0.5 0.5
15 126.0 0.5 0.5
15.2 128.0 0.5 0.5
16 130.0 0.5 0.5
16.2 132.0 0.5 0.5
17 134.0 0.5 0.5
17.2 136.0 0.5 0.5
18 138.0 0.5 0.5
18.2 140.0 0.5 0.5
19 142.0 0.5 0.5
20 146.0 0.5 0.5
Marker Name D16S539
5 234.0 0.5 0.5
6 238.0 0.5 0.5
7 242.0 0.5 0.5
8 246.0 0.5 0.5
9 250.0 0.5 0.5
10 254.0 0.5 0.5
11 258.0 0.5 0.5
12 262.0 0.5 0.5
12.2 264.0 0.5 0.5
13 266.0 0.5 0.5
14 270.0 0.5 0.5
15 274.0 0.5 0.5
16 278.0 0.5 0.5
Marker Name AMEL
X 107.0 0.5 0.5
Y 113.0 0.5 0.5
Marker Name TH01
4 165.0 0.5 0.5
5 169.0 0.5 0.5
5.3 172.0 0.5 0.4
6 173.0 0.4 0.5
6.3 176.0 0.5 0.4
7 177.0 0.4 0.5
7.3 180.0 0.5 0.4
8 181.0 0.4 0.5
8.3 184.0 0.5 0.4
9 185.0 0.4 0.5
9.3 188.0 0.5 0.4
10 189.0 0.4 0.5
10.3 192.0 0.5 0.4
11 193.0 0.4 0.5
Marker Name TPOX
5 214.0 0.5 0.5
6 218.0 0.5 0.5
7 222.0 0.5 0.5
8 226.0 0.5 0.5
9 230.0 0.5 0.5
10 234.0 0.5 0.5
11 238.0 0.5 0.5
12 242.0 0.5 0.5
13 246.0 0.5 0.5
14 250.0 0.5 0.5
Marker Name CSF1PO
6 281.0 0.5 0.5
7 285.0 0.5 0.5
8 289.0 0.5 0.5
9 293.0 0.5 0.5
10 297.0 0.5 0.5
10.2 299.0 0.5 0.5
11 301.0 0.5 0.5
12 305.0 0.5 0.5
13 309.0 0.5 0.5
14 313.0 0.5 0.5
15 317.0 0.5 0.5
16 321.0 0.5 0.5
Marker Name D7S820
5 254.0 0.5 0.5
5.2 256.0 0.5 0.5
6 258.0 0.5 0.5
7 262.0 0.5 0.5
8 266.0 0.5 0.5
8.2 268.0 0.5 0.5
9 270.0 0.5 0.5
9.2 272.0 0.5 0.5
10 274.0 0.5 0.5
11 278.0 0.5 0.5
12 282.0 0.5 0.5
13 286.0 0.5 0.5
14 290.0 0.5 0.5
15 294.0 0.5 0.5
16 298.0 0.5 0.5
Panel Name SGM_Plus_v1
Marker Name D3S1358
11 110.0 0.5 0.5
12 114.0 0.5 0.5
13 118.0 0.5 0.5
14 122.0 0.5 0.5
15 126.0 0.5 0.5
15.2 128.0 0.5 0.5
16 130.0 0.5 0.5
16.2 132.0 0.5 0.5
17 134.0 0.5 0.5
17.2 136.0 0.5 0.5
18 138.0 0.5 0.5
18.2 140.0 0.5 0.5
19 142.0 0.5 0.5
20 146.0 0.5 0.5
Marker Name vWA
10 153.0 0.5 0.5
11 157.0 0.5 0.5
12 161.0 0.5 0.5
13 165.0 0.5 0.5
14 169.0 0.5 0.5
15 173.0 0.5 0.5
15.2 175.0 0.5 0.5
16 177.0 0.5 0.5
17 181.0 0.5 0.5
18 185.0 0.5 0.5
18.2 187.0 0.5 0.5
19 189.0 0.5 0.5
20 193.0 0.5 0.5
21 197.0 0.5 0.5
22 201.0 0.5 0.5
23 205.0 0.5 0.5
24 209.0 0.5 0.5
25 213.0 0.5 0.5
Marker Name D16S539
5 234.0 0.5 0.5
6 238.0 0.5 0.5
7 242.0 0.5 0.5
8 246.0 0.5 0.5
9 250.0 0.5 0.5
10 254.0 0.5 0.5
11 258.0 0.5 0.5
12 262.0 0.5 0.5
12.2 264.0 0.5 0.5
13 266.0 0.5 0.5
14 270.0 0.5 0.5
15 274.0 0.5 0.5
16 278.0 0.5 0.5
Marker Name D2S1338
14 285.0 0.5 0.5
15 289.0 0.5 0.5
16 293.0 0.5 0.5
17 297.0 0.5 0.5
18 301.0 0.5 0.5
19 305.0 0.5 0.5
20 309.0 0.5 0.5
21 313.0 0.5 0.5
22 317.0 0.5 0.5
23 321.0 0.5 0.5
24 325.0 0.5 0.5
25 329.0 0.5 0.5
26 333.0 0.5 0.5
27 337.0 0.5 0.5
28 341.0 0.5 0.5
29 345.0 0.5 0.5
Marker Name AMEL
X 107.0 0.5 0.5
Y 113.0 0.5 0.5
Marker Name D8S1179
7 124.0 0.5 0.5
8 128.0 0.5 0.5
9 132.0 0.5 0.5
10 136.0 0.5 0.5
11 140.0 0.5 0.5
12 144.0 0.5 0.5
13 148.0 0.5 0.5
14 152.0 0.5 0.5
15 156.0 0.5 0.5
16 160.0 0.5 0.5
17 164.0 0.5 0.5
18 168.0 0.5 0.5
19 172.0 0.5 0.5
20 176.0 0.5 0.5
Marker Name D21S11
23.2 185.0 0.5 0.5
24 187.0 0.5 0.5
24.2 189.0 0.5 0.5
25 191.0 0.5 0.5
25.2 193.0 0.5 0.5
26 195.0 0.5 0.5
26.2 197.0 0.5 0.5
27 199.0 0.5 0.5
27.2 201.0 0.5 0.5
28 203.0 0.5 0.5
28.2 205.0 0.5 0.5
29 207.0 0.5 0.5
29.2 209.0 0.5 0.5
30 211.0 0.5 0.5
30.2 213.0 0.5 0.5
31 215.0 0.5 0.5
31.2 217.0 0.5 0.5
32 219.0 0.5 0.5
32.2 221.0 0.5 0.5
33 223.0 0.5 0.5
33.2 225.0 0.5 0.5
34 227.0 0.5 0.5
34.2 229.0 0.5 0.5
35 231.0 0.5 0.5
35.2 233.0 0.5 0.5
36 235.0 0.5 0.5
36.2 237.0 0.5 0.5
37 239.0 0.5 0.5
37.2 241.0 0.5 0.5
38 243.0 0.5 0.5
38.2 245.0 0.5 0.5
39 247.0 0.5 0.5
Marker Name D18S51
7 265.0 0.5 0.5
8 269.0 0.5 0.5
9 273.0 0.5 0.5
9.2 275.0 0.5 0.5
10 277.0 0.5 0.5
10.2 279.0 0.5 0.5
11 281.0 0.5 0.5
11.2 283.0 0.5 0.5
12 285.0 0.5 0.5
12.2 287.0 0.5 0.5
13 289.0 0.5 0.5
13.2 291.0 0.5 0.5
14 293.0 0.5 0.5
14.2 295.0 0.5 0.5
15 297.0 0.5 0.5
15.2 299.0 0.5 0.5
16 301.0 0.5 0.5
16.2 303.0 0.5 0.5
17 305.0 0.5 0.5
17.2 307.0 0.5 0.5
18 309.0 0.5 0.5
18.2 311.0 0.5 0.5
19 313.0 0.5 0.5
19.2 315.0 0.5 0.5
20 317.0 0.5 0.5
20.2 319.0 0.5 0.5
21 321.0 0.5 0.5
21.2 323.0 0.5 0.5
22 325.0 0.5 0.5
22.2 327.0 0.5 0.5
23 329.0 0.5 0.5
23.2 331.0 0.5 0.5
24 333.0 0.5 0.5
25 337.0 0.5 0.5
26 341.0 0.5 0.5
27 345.0 0.5 0.5
Marker Name D19S433
9 106.0 0.5 0.5
9.2 108.0 0.5 0.5
10 110.0 0.5 0.5
10.2 112.0 0.5 0.5
11 114.0 0.5 0.5
11.2 116.0 0.5 0.5
12 118.0 0.5 0.5
12.2 120.0 0.5 0.5
13 122.0 0.5 0.5
13.2 124.0 0.5 0.5
14 126.0 0.5 0.5
14.2 128.0 0.5 0.5
15 130.0 0.5 0.5
15.2 132.0 0.5 0.5
16 134.0 0.5 0.5
16.2 136.0 0.5 0.5
17 138.0 0.5 0.5
17.2 140.0 0.5 0.5
18 142.0 0.5 0.5
18.2 144.0 0.5 0.5
Marker Name TH01
3 161.0 0.5 0.5
4 165.0 0.5 0.5
5 169.0 0.5 0.5
5.3 172.0 0.5 0.4
6 173.0 0.4 0.5
6.3 176.0 0.5 0.4
7 177.0 0.4 0.5
7.3 180.0 0.5 0.4
8 181.0 0.4 0.5
8.3 184.0 0.5 0.4
9 185.0 0.4 0.5
9.3 188.0 0.5 0.4
10 189.0 0.4 0.5
10.3 192.0 0.5 0.4
11 193.0 0.4 0.5
12 197.0 0.5 0.5
13 201.0 0.5 0.5
13.3 204.0 0.5 0.5
Marker Name FGA
16 211.0 0.5 0.5
16.2 213.0 0.5 0.5
17 215.0 0.5 0.5
17.2 217.0 0.5 0.5
18 219.0 0.5 0.5
18.2 221.0 0.5 0.5
19 223.0 0.5 0.5
19.2 225.0 0.5 0.5
20 227.0 0.5 0.5
20.2 229.0 0.5 0.5
21 231.0 0.5 0.5
21.2 233.0 0.5 0.5
22 235.0 0.5 0.5
22.2 237.0 0.5 0.5
23 239.0 0.5 0.5
23.2 241.0 0.5 0.5
24 243.0 0.5 0.5
24.2 245.0 0.5 0.5
25 247.0 0.5 0.5
25.2 249.0 0.5 0.5
26 251.0 0.5 0.5
26.2 253.0 0.5 0.5
27 255.0 0.5 0.5
27.2 257.0 0.5 0.5
28 259.0 0.5 0.5
28.2 261.0 0.5 0.5
29 263.0 0.5 0.5
29.2 265.0 0.5 0.5
30 267.0 0.5 0.5
30.2 269.0 0.5 0.5
31 271.0 0.5 0.5
31.2 273.0 0.5 0.5
32 275.0 0.5 0.5
32.2 277.0 0.5 0.5
33.2 281.0 0.5 0.5
34.2 285.0 0.5 0.5
42.2 317.0 0.5 0.5
43.2 321.0 0.5 0.5
44.2 325.0 0.5 0.5
45.2 329.0 0.5 0.5
46.2 333.0 0.5 0.5
47.2 337.0 0.5 0.5
48.2 341.0 0.5 0.5
49.2 345.0 0.5 0.5
50.2 349.0 0.5 0.5
51.2 353.0 0.5 0.5
Panel Name Identifiler_v1
Marker Name D8S1179
7 124.0 0.5 0.5
8 128.0 0.5 0.5
9 132.0 0.5 0.5
10 136.0 0.5 0.5
11 140.0 0.5 0.5
12 144.0 0.5 0.5
13 148.0 0.5 0.5
14 152.0 0.5 0.5
15 156.0 0.5 0.5
16 160.0 0.5 0.5
17 164.0 0.5 0.5
18 168.0 0.5 0.5
19 172.0 0.5 0.5
20 176.0 0.5 0.5
Marker Name D21S11
23.2 185.0 0.5 0.5
24 187.0 0.5 0.5
24.2 189.0 0.5 0.5
25 191.0 0.5 0.5
25.2 193.0 0.5 0.5
26 195.0 0.5 0.5
26.2 197.0 0.5 0.5
27 199.0 0.5 0.5
27.2 201.0 0.5 0.5
28 203.0 0.5 0.5
28.2 205.0 0.5 0.5
29 207.0 0.5 0.5
29.2 209.0 0.5 0.5
30 211.0 0.5 0.5
30.2 213.0 0.5 0.5
31 215.0 0.5 0.5
31.2 217.0 0.5 0.5
32 219.0 0.5 0.5
32.2 221.0 0.5 0.5
33 223.0 0.5 0.5
33.2 225.0 0.5 0.5
34 227.0 0.5 0.5
34.2 229.0 0.5 0.5
35 231.0 0.5 0.5
35.2 233.0 0.5 0.5
36 235.0 0.5 0.5
36.2 237.0 0.5 0.5
37 239.0 0.5 0.5
37.2 241.0 0.5 0.5
38 243.0 0.5 0.5
38.2 245.0 0.5 0.5
39 247.0 0.5 0.5
Marker Name D7S820
5 254.0 0.5 0.5
5.2 256.0 0.5 0.5
6 258.0 0.5 0.5
7 262.0 0.5 0.5
8 266.0 0.5 0.5
8.2 268.0 0.5 0.5
9 270.0 0.5 0.5
9.2 272.0 0.5 0.5
10 274.0 0.5 0.5
11 278.0 0.5 0.5
12 282.0 0.5 0.5
13 286.0 0.5 0.5
14 290.0 0.5 0.5
15 294.0 0.5 0.5
16 298.0 0.5 0.5
Marker Name CSF1PO
5 302.63 0.5 0.5
6 306.63 0.5 0.5
7 310.63 0.5 0.5
8 314.63 0.5 0.5
9 318.63 0.5 0.5
10 322.63 0.5 0.5
10.2 324.63 0.5 0.5
11 326.63 0.5 0.5
12 330.63 0.5 0.5
13 334.63 0.5 0.5
14 338.63 0.5 0.5
15 342.63 0.5 0.5
16 346.63 0.5 0.5
Marker Name D3S1358
11 110.0 0.5 0.5
12 114.0 0.5 0.5
13 118.0 0.5 0.5
14 122.0 0.5 0.5
15 126.0 0.5 0.5
15.2 128.0 0.5 0.5
16 130.0 0.5 0.5
16.2 132.0 0.5 0.5
17 134.0 0.5 0.5
17.2 136.0 0.5 0.5
18 138.0 0.5 0.5
18.2 140.0 0.5 0.5
19 142.0 0.5 0.5
20 146.0 0.5 0.5
Marker Name TH01
3 161.0 0.5 0.5
4 165.0 0.5 0.5
5 169.0 0.5 0.5
5.3 172.0 0.5 0.4
6 173.0 0.4 0.5
6.3 176.0 0.5 0.4
7 177.0 0.4 0.5
7.3 180.0 0.5 0.4
8 181.0 0.4 0.5
8.3 184.0 0.5 0.4
9 185.0 0.4 0.5
9.3 188.0 0.5 0.4
10 189.0 0.4 0.5
10.3 192.0 0.5 0.4
11 193.0 0.4 0.5
12 197.0 0.5 0.5
13 201.0 0.5 0.5
13.3 204.0 0.5 0.5
Marker Name D13S317
7 213.65 0.5 0.5
8 217.65 0.5 0.5
9 221.65 0.5 0.5
10 225.65 0.5 0.5
11 229.65 0.5 0.5
12 233.65 0.5 0.5
13 237.65 0.5 0.5
14 241.65 0.5 0.5
15 245.65 0.5 0.5
16 249.65 0.5 0.5
Marker Name D16S539
5 257.3 0.5 0.5
6 261.3 0.5 0.5
7 265.3 0.5 0.5
8 269.3 0.5 0.5
9 273.3 0.5 0.5
10 277.3 0.5 0.5
11 281.3 0.5 0.5
12 285.3 0.5 0.5
12.2 287.3 0.5 0.5
13 289.3 0.5 0.5
14 293.3 0.5 0.5
15 297.3 0.5 0.5
16 301.3 0.5 0.5
Marker Name D2S1338
14 305.31 0.5 0.5
15 309.31 0.5 0.5
16 313.31 0.5 0.5
17 317.31 0.5 0.5
18 319.31 0.5 0.5
19 325.31 0.5 0.5
20 329.31 0.5 0.5
21 333.31 0.5 0.5
22 337.31 0.5 0.5
23 341.31 0.5 0.5
24 345.31 0.5 0.5
25 349.31 0.5 0.5
26 353.31 0.5 0.5
27 357.31 0.5 0.5
28 361.31 0.5 0.5
29 365.31 0.5 0.5
Marker Name D19S433
9 106.0 0.5 0.5
9.2 108.0 0.5 0.5
10 110.0 0.5 0.5
10.2 112.0 0.5 0.5
11 114.0 0.5 0.5
11.2 116.0 0.5 0.5
12 118.0 0.5 0.5
12.2 120.0 0.5 0.5
13 122.0 0.5 0.5
13.2 124.0 0.5 0.5
14 126.0 0.5 0.5
14.2 128.0 0.5 0.5
15 130.0 0.5 0.5
15.2 132.0 0.5 0.5
16 134.0 0.5 0.5
16.2 136.0 0.5 0.5
17 138.0 0.5 0.5
17.2 140.0 0.5 0.5
18 142.0 0.5 0.5
18.2 144.0 0.5 0.5
Marker Name vWA
10 153.0 0.5 0.5
11 157.0 0.5 0.5
12 161.0 0.5 0.5
13 165.0 0.5 0.5
14 169.0 0.5 0.5
15 173.0 0.5 0.5
15.2 175.0 0.5 0.5
16 177.0 0.5 0.5
17 181.0 0.5 0.5
18 185.0 0.5 0.5
18.2 187.0 0.5 0.5
19 189.0 0.5 0.5
20 193.0 0.5 0.5
21 197.0 0.5 0.5
22 201.0 0.5 0.5
23 205.0 0.5 0.5
24 209.0 0.5 0.5
25 213.0 0.5 0.5
Marker Name TPOX
5 220.99 0.5 0.5
6 224.99 0.5 0.5
7 228.99 0.5 0.5
8 232.99 0.5 0.5
9 236.99 0.5 0.5
10 240.99 0.5 0.5
11 244.99 0.5 0.5
12 248.99 0.5 0.5
13 252.99 0.5 0.5
14 256.99 0.5 0.5
Marker Name D18S51
7 265.0 0.5 0.5
8 269.0 0.5 0.5
9 273.0 0.5 0.5
9.2 275.0 0.5 0.5
10 277.0 0.5 0.5
10.2 279.0 0.5 0.5
11 281.0 0.5 0.5
11.2 283.0 0.5 0.5
12 285.0 0.5 0.5
12.2 287.0 0.5 0.5
13 289.0 0.5 0.5
13.2 291.0 0.5 0.5
14 293.0 0.5 0.5
14.2 295.0 0.5 0.5
15 297.0 0.5 0.5
15.2 299.0 0.5 0.5
16 301.0 0.5 0.5
16.2 303.0 0.5 0.5
17 305.0 0.5 0.5
17.2 307.0 0.5 0.5
18 309.0 0.5 0.5
18.2 311.0 0.5 0.5
19 313.0 0.5 0.5
19.2 315.0 0.5 0.5
20 317.0 0.5 0.5
20.2 319.0 0.5 0.5
21 321.0 0.5 0.5
21.2 323.0 0.5 0.5
22 325.0 0.5 0.5
22.2 327.0 0.5 0.5
23 329.0 0.5 0.5
23.2 331.0 0.5 0.5
24 333.0 0.5 0.5
25 337.0 0.5 0.5
26 341.0 0.5 0.5
27 345.0 0.5 0.5
Marker Name AMEL
X 107.0 0.5 0.5
Y 113.0 0.5 0.5
Marker Name D5S818
6 131.0 0.5 0.5
7 135.0 0.5 0.5
8 139.0 0.5 0.5
9 143.0 0.5 0.5
10 147.0 0.5 0.5
11 151.0 0.5 0.5
12 155.0 0.5 0.5
13 159.0 0.5 0.5
14 163.0 0.5 0.5
15 167.0 0.5 0.5
16 171.0 0.5 0.5
17 175.0 0.5 0.5
Marker Name FGA
16 211.0 0.5 0.5
16.2 213.0 0.5 0.5
17 215.0 0.5 0.5
17.2 217.0 0.5 0.5
18 219.0 0.5 0.5
18.2 221.0 0.5 0.5
19 223.0 0.5 0.5
19.2 225.0 0.5 0.5
20 227.0 0.5 0.5
20.2 229.0 0.5 0.5
21 231.0 0.5 0.5
21.2 233.0 0.5 0.5
22 235.0 0.5 0.5
22.2 237.0 0.5 0.5
23 239.0 0.5 0.5
23.2 241.0 0.5 0.5
24 243.0 0.5 0.5
24.2 245.0 0.5 0.5
25 247.0 0.5 0.5
25.2 249.0 0.5 0.5
26 251.0 0.5 0.5
26.2 253.0 0.5 0.5
27 255.0 0.5 0.5
27.2 257.0 0.5 0.5
28 259.0 0.5 0.5
28.2 261.0 0.5 0.5
29 263.0 0.5 0.5
29.2 265.0 0.5 0.5
30 267.0 0.5 0.5
30.2 269.0 0.5 0.5
31 271.0 0.5 0.5
31.2 273.0 0.5 0.5
32 275.0 0.5 0.5
32.2 277.0 0.5 0.5
33.2 281.0 0.5 0.5
34.2 285.0 0.5 0.5
42.2 317.0 0.5 0.5
43.2 321.0 0.5 0.5
44.2 325.0 0.5 0.5
45.2 329.0 0.5 0.5
46.2 333.0 0.5 0.5
47.2 337.0 0.5 0.5
48.2 341.0 0.5 0.5
49.2 345.0 0.5 0.5
50.2 349.0 0.5 0.5
51.2 353.0 0.5 0.5
Panel Name SEfiler_v1
Marker Name D3S1358
11 110.0 0.5 0.5
12 114.0 0.5 0.5
13 118.0 0.5 0.5
14 122.0 0.5 0.5
15 126.0 0.5 0.5
15.2 128.0 0.5 0.5
16 130.0 0.5 0.5
16.2 132.0 0.5 0.5
17 134.0 0.5 0.5
17.2 136.0 0.5 0.5
18 138.0 0.5 0.5
18.2 140.0 0.5 0.5
19 142.0 0.5 0.5
20 146.0 0.5 0.5
Marker Name vWA
10 153.0 0.5 0.5
11 157.0 0.5 0.5
12 161.0 0.5 0.5
13 165.0 0.5 0.5
14 169.0 0.5 0.5
15 173.0 0.5 0.5
15.2 175.0 0.5 0.5
16 177.0 0.5 0.5
17 181.0 0.5 0.5
18 185.0 0.5 0.5
18.2 187.0 0.5 0.5
19 189.0 0.5 0.5
20 193.0 0.5 0.5
21 197.0 0.5 0.5
22 201.0 0.5 0.5
23 205.0 0.5 0.5
24 209.0 0.5 0.5
25 213.0 0.5 0.5
Marker Name D16S539
5 234.0 0.5 0.5
6 238.0 0.5 0.5
7 242.0 0.5 0.5
8 246.0 0.5 0.5
9 250.0 0.5 0.5
10 254.0 0.5 0.5
11 258.0 0.5 0.5
12 262.0 0.5 0.5
12.2 264.0 0.5 0.5
13 266.0 0.5 0.5
14 270.0 0.5 0.5
15 274.0 0.5 0.5
16 278.0 0.5 0.5
Marker Name D2S1338
14 285.0 0.5 0.5
15 289.0 0.5 0.5
16 293.0 0.5 0.5
17 297.0 0.5 0.5
18 301.0 0.5 0.5
19 305.0 0.5 0.5
20 309.0 0.5 0.5
21 313.0 0.5 0.5
22 317.0 0.5 0.5
23 321.0 0.5 0.5
24 325.0 0.5 0.5
25 329.0 0.5 0.5
26 333.0 0.5 0.5
27 337.0 0.5 0.5
28 341.0 0.5 0.5
29 345.0 0.5 0.5
Marker Name AMEL
X 107.0 0.5 0.5
Y 113.0 0.5 0.5
Marker Name D8S1179
7 124.0 0.5 0.5
8 128.0 0.5 0.5
9 132.0 0.5 0.5
10 136.0 0.5 0.5
11 140.0 0.5 0.5
12 144.0 0.5 0.5
13 148.0 0.5 0.5
14 152.0 0.5 0.5
15 156.0 0.5 0.5
16 160.0 0.5 0.5
17 164.0 0.5 0.5
18 168.0 0.5 0.5
19 172.0 0.5 0.5
20 176.0 0.5 0.5
Marker Name SE33
4.2 203.0 0.5 0.5
5 205.0 0.5 0.5
6 209.0 0.5 0.5
6.3 212.0 0.5 0.4
7 213.0 0.4 0.5
8 217.0 0.5 0.5
8.2 219.0 0.5 0.5
9 221.0 0.5 0.5
9.2 223.0 0.5 0.5
10 225.0 0.5 0.5
10.2 227.0 0.5 0.5
11 229.0 0.5 0.5
11.2 231.0 0.5 0.5
12 233.0 0.5 0.5
12.2 235.0 0.5 0.5
13 237.0 0.5 0.5
13.2 239.0 0.5 0.5
14 241.0 0.5 0.5
14.2 243.0 0.5 0.5
15 245.0 0.5 0.5
15.2 247.0 0.5 0.5
16 249.0 0.5 0.5
16.2 251.0 0.5 0.5
17 253.0 0.5 0.5
17.2 255.0 0.5 0.5
18 257.0 0.5 0.5
18.2 259.0 0.5 0.5
19 261.0 0.5 0.5
19.2 263.0 0.5 0.5
20 265.0 0.5 0.5
20.2 267.0 0.5 0.5
21 269.0 0.5 0.4
21.1 270.0 0.4 0.4
21.2 271.0 0.4 0.5
22 273.0 0.5 0.5
22.2 275.0 0.5 0.5
23 277.0 0.5 0.5
23.2 279.0 0.5 0.5
24 281.0 0.5 0.5
24.2 283.0 0.5 0.5
25 285.0 0.5 0.5
25.2 287.0 0.5 0.5
26 289.0 0.5 0.5
26.2 291.0 0.5 0.5
27 293.0 0.5 0.5
27.2 295.0 0.5 0.5
28 297.0 0.5 0.5
28.2 299.0 0.5 0.5
29 301.0 0.5 0.5
29.2 303.0 0.5 0.5
30 305.0 0.5 0.5
30.2 307.0 0.5 0.5
31 309.0 0.5 0.5
31.2 311.0 0.5 0.5
32 313.0 0.5 0.5
32.2 315.0 0.5 0.5
33 317.0 0.5 0.5
33.2 319.0 0.5 0.5
34 321.0 0.5 0.5
34.2 323.0 0.5 0.5
35 325.0 0.5 0.5
35.2 327.0 0.5 0.5
36 329.0 0.5 0.5
36.2 331.0 0.5 0.5
37 333.0 0.5 0.5
37.2 335.0 0.5 0.5
38 337.0 0.5 0.5
Marker Name D19S433
9 106.0 0.5 0.5
9.2 108.0 0.5 0.5
10 110.0 0.5 0.5
10.2 112.0 0.5 0.5
11 114.0 0.5 0.5
11.2 116.0 0.5 0.5
12 118.0 0.5 0.5
12.2 120.0 0.5 0.5
13 122.0 0.5 0.5
13.2 124.0 0.5 0.5
14 126.0 0.5 0.5
14.2 128.0 0.5 0.5
15 130.0 0.5 0.5
15.2 132.0 0.5 0.5
16 134.0 0.5 0.5
16.2 136.0 0.5 0.5
17 138.0 0.5 0.5
17.2 140.0 0.5 0.5
18 142.0 0.5 0.5
18.2 144.0 0.5 0.5
Marker Name TH01
3 161.0 0.5 0.5
4 165.0 0.5 0.5
5 169.0 0.5 0.5
5.3 172.0 0.5 0.4
6 173.0 0.4 0.5
6.3 176.0 0.5 0.4
7 177.0 0.4 0.5
7.3 180.0 0.5 0.4
8 181.0 0.4 0.5
8.3 184.0 0.5 0.4
9 185.0 0.4 0.5
9.3 188.0 0.5 0.4
10 189.0 0.4 0.5
10.3 192.0 0.5 0.4
11 193.0 0.4 0.5
12 197.0 0.5 0.5
13 201.0 0.5 0.5
13.3 204.0 0.5 0.5
Marker Name FGA
16 211.0 0.5 0.5
16.2 213.0 0.5 0.5
17 215.0 0.5 0.5
17.2 217.0 0.5 0.5
18 219.0 0.5 0.5
18.2 221.0 0.5 0.5
19 223.0 0.5 0.5
19.2 225.0 0.5 0.5
20 227.0 0.5 0.5
20.2 229.0 0.5 0.5
21 231.0 0.5 0.5
21.2 233.0 0.5 0.5
22 235.0 0.5 0.5
22.2 237.0 0.5 0.5
23 239.0 0.5 0.5
23.2 241.0 0.5 0.5
24 243.0 0.5 0.5
24.2 245.0 0.5 0.5
25 247.0 0.5 0.5
25.2 249.0 0.5 0.5
26 251.0 0.5 0.5
26.2 253.0 0.5 0.5
27 255.0 0.5 0.5
27.2 257.0 0.5 0.5
28 259.0 0.5 0.5
28.2 261.0 0.5 0.5
29 263.0 0.5 0.5
29.2 265.0 0.5 0.5
30 267.0 0.5 0.5
30.2 269.0 0.5 0.5
31 271.0 0.5 0.5
31.2 273.0 0.5 0.5
32 275.0 0.5 0.5
32.2 277.0 0.5 0.5
33.2 281.0 0.5 0.5
34.2 285.0 0.5 0.5
42.2 317.0 0.5 0.5
43.2 321.0 0.5 0.5
44.2 325.0 0.5 0.5
45.2 329.0 0.5 0.5
46.2 333.0 0.5 0.5
47.2 337.0 0.5 0.5
48.2 341.0 0.5 0.5
49.2 345.0 0.5 0.5
50.2 349.0 0.5 0.5
51.2 353.0 0.5 0.5
Marker Name D21S11
23.2 185.0 0.5 0.5
24 187.0 0.5 0.5
24.2 189.0 0.5 0.5
25 191.0 0.5 0.5
25.2 193.0 0.5 0.5
26 195.0 0.5 0.5
26.2 197.0 0.5 0.5
27 199.0 0.5 0.5
27.2 201.0 0.5 0.5
28 203.0 0.5 0.5
28.2 205.0 0.5 0.5
29 207.0 0.5 0.5
29.2 209.0 0.5 0.5
30 211.0 0.5 0.5
30.2 213.0 0.5 0.5
31 215.0 0.5 0.5
31.2 217.0 0.5 0.5
32 219.0 0.5 0.5
32.2 221.0 0.5 0.5
33 223.0 0.5 0.5
33.2 225.0 0.5 0.5
34 227.0 0.5 0.5
34.2 229.0 0.5 0.5
35 231.0 0.5 0.5
35.2 233.0 0.5 0.5
36 235.0 0.5 0.5
36.2 237.0 0.5 0.5
37 239.0 0.5 0.5
37.2 241.0 0.5 0.5
38 243.0 0.5 0.5
38.2 245.0 0.5 0.5
39 247.0 0.5 0.5
Marker Name D18S51
7 265.0 0.5 0.5
8 269.0 0.5 0.5
9 273.0 0.5 0.5
9.2 275.0 0.5 0.5
10 277.0 0.5 0.5
10.2 279.0 0.5 0.5
11 281.0 0.5 0.5
11.2 283.0 0.5 0.5
12 285.0 0.5 0.5
12.2 287.0 0.5 0.5
13 289.0 0.5 0.5
13.2 291.0 0.5 0.5
14 293.0 0.5 0.5
14.2 295.0 0.5 0.5
15 297.0 0.5 0.5
15.2 299.0 0.5 0.5
16 301.0 0.5 0.5
16.2 303.0 0.5 0.5
17 305.0 0.5 0.5
17.2 307.0 0.5 0.5
18 309.0 0.5 0.5
18.2 311.0 0.5 0.5
19 313.0 0.5 0.5
19.2 315.0 0.5 0.5
20 317.0 0.5 0.5
20.2 319.0 0.5 0.5
21 321.0 0.5 0.5
21.2 323.0 0.5 0.5
22 325.0 0.5 0.5
22.2 327.0 0.5 0.5
23 329.0 0.5 0.5
23.2 331.0 0.5 0.5
24 333.0 0.5 0.5
25 337.0 0.5 0.5
26 341.0 0.5 0.5
27 345.0 0.5 0.5
Panel Name Profiler_Plus_CODIS_v1
Marker Name D3S1358
<12 113.49 15.49 0.0
12 114.0 0.5 0.5
13 118.0 0.5 0.5
14 122.0 0.5 0.5
15 126.0 0.5 0.5
15.2 128.0 0.5 0.5
16 130.0 0.5 0.5
16.2 132.0 0.5 0.5
17 134.0 0.5 0.5
17.2 136.0 0.5 0.5
18 138.0 0.5 0.5
18.2 140.0 0.5 0.5
19 142.0 0.5 0.5
>19 142.51 0.0 5.49
Marker Name vWA
<11 156.49 5.49 0.0
11 157.0 0.5 0.5
12 161.0 0.5 0.5
13 165.0 0.5 0.5
14 169.0 0.5 0.5
15 173.0 0.5 0.5
15.2 175.0 0.5 0.5
16 177.0 0.5 0.5
17 181.0 0.5 0.5
18 185.0 0.5 0.5
18.2 187.0 0.5 0.5
19 189.0 0.5 0.5
20 193.0 0.5 0.5
21 197.0 0.5 0.5
>21 197.51 0.0 5.49
Marker Name FGA
<18 218.49 12.24 0.0
18 219.0 0.5 0.5
18.2 221.0 0.5 0.5
19 223.0 0.5 0.5
19.2 225.0 0.5 0.5
20 227.0 0.5 0.5
20.2 229.0 0.5 0.5
21 231.0 0.5 0.5
21.2 233.0 0.5 0.5
22 235.0 0.5 0.5
22.2 237.0 0.5 0.5
23 239.0 0.5 0.5
23.2 241.0 0.5 0.5
24 243.0 0.5 0.5
24.2 245.0 0.5 0.5
25 247.0 0.5 0.5
25.2 249.0 0.5 0.5
26 251.0 0.5 0.5
26.2 253.0 0.5 0.5
27 255.0 0.5 0.5
27.2 257.0 0.5 0.5
28 259.0 0.5 0.5
28.2 261.0 0.5 0.5
29 263.0 0.5 0.5
29.2 265.0 0.5 0.5
30 267.0 0.5 0.5
>30 267.51 0.0 92.49
Marker Name AMEL
X 107.0 0.5 0.5
Y 113.0 0.5 0.5
Marker Name D8S1179
<8 127.49 9.49 0.0
8 128.0 0.5 0.5
9 132.0 0.5 0.5
10 136.0 0.5 0.5
11 140.0 0.5 0.5
12 144.0 0.5 0.5
13 148.0 0.5 0.5
14 152.0 0.5 0.5
15 156.0 0.5 0.5
16 160.0 0.5 0.5
17 164.0 0.5 0.5
18 168.0 0.5 0.5
19 172.0 0.5 0.5
>19 172.51 0.0 10.99
Marker Name D21S11
<24.2 188.49 3.99 0.0
24.2 189.0 0.5 0.5
25 191.0 0.5 0.5
25.2 193.0 0.5 0.5
26 195.0 0.5 0.5
26.2 197.0 0.5 0.5
27 199.0 0.5 0.5
27.2 201.0 0.5 0.5
28 203.0 0.5 0.5
28.2 205.0 0.5 0.5
29 207.0 0.5 0.5
29.2 209.0 0.5 0.5
30 211.0 0.5 0.5
30.2 213.0 0.5 0.5
31 215.0 0.5 0.5
31.2 217.0 0.5 0.5
32 219.0 0.5 0.5
32.2 221.0 0.5 0.5
33 223.0 0.5 0.5
33.2 225.0 0.5 0.5
34 227.0 0.5 0.5
34.2 229.0 0.5 0.5
35 231.0 0.5 0.5
35.2 233.0 0.5 0.5
36 235.0 0.5 0.5
36.2 237.0 0.5 0.5
37 239.0 0.5 0.5
37.2 241.0 0.5 0.5
38 243.0 0.5 0.5
>38 243.51 0.0 3.99
Marker Name D18S51
<9 272.49 8.0 0.0
9 273.0 0.5 0.5
9.2 275.0 0.5 0.5
10 277.0 0.5 0.5
10.2 279.0 0.5 0.5
11 281.0 0.5 0.5
11.2 283.0 0.5 0.5
12 285.0 0.5 0.5
12.2 287.0 0.5 0.5
13 289.0 0.5 0.5
13.2 291.0 0.5 0.5
14 293.0 0.5 0.5
14.2 295.0 0.5 0.5
15 297.0 0.5 0.5
15.2 299.0 0.5 0.5
16 301.0 0.5 0.5
16.2 303.0 0.5 0.5
17 305.0 0.5 0.5
17.2 307.0 0.5 0.5
18 309.0 0.5 0.5
18.2 311.0 0.5 0.5
19 313.0 0.5 0.5
19.2 315.0 0.5 0.5
20 317.0 0.5 0.5
20.2 319.0 0.5 0.5
21 321.0 0.5 0.5
21.2 323.0 0.5 0.5
22 325.0 0.5 0.5
22.2 327.0 0.5 0.5
23 329.0 0.5 0.5
23.2 331.0 0.5 0.5
24 333.0 0.5 0.5
25 337.0 0.5 0.5
26 341.0 0.5 0.5
>26 341.51 0.0 8.49
Marker Name D5S818
<7 134.49 6.49 0.0
7 135.0 0.5 0.5
8 139.0 0.5 0.5
9 143.0 0.5 0.5
10 147.0 0.5 0.5
11 151.0 0.5 0.5
12 155.0 0.5 0.5
13 159.0 0.5 0.5
14 163.0 0.5 0.5
15 167.0 0.5 0.5
16 171.0 0.5 0.5
>16 171.51 0.0 8.49
Marker Name D13S317
<8 205.49 13.49 0.0
8 206.0 0.5 0.5
9 210.0 0.5 0.5
10 214.0 0.5 0.5
11 218.0 0.5 0.5
12 222.0 0.5 0.5
13 226.0 0.5 0.5
14 230.0 0.5 0.5
15 234.0 0.5 0.5
>15 234.51 0.0 7.49
Marker Name D7S820
<6 257.49 6.49 0.0
6 258.0 0.5 0.5
7 262.0 0.5 0.5
8 266.0 0.5 0.5
8.2 268.0 0.5 0.5
9 270.0 0.5 0.5
9.2 272.0 0.5 0.5
10 274.0 0.5 0.5
11 278.0 0.5 0.5
12 282.0 0.5 0.5
13 286.0 0.5 0.5
14 290.0 0.5 0.5
15 294.0 0.5 0.5
>15 294.51 0.0 3.99
Panel Name COfiler_CODIS_v1
Marker Name D3S1358
<12 113.49 15.49 0.0
12 114.0 0.5 0.5
13 118.0 0.5 0.5
14 122.0 0.5 0.5
15 126.0 0.5 0.5
15.2 128.0 0.5 0.5
16 130.0 0.5 0.5
16.2 132.0 0.5 0.5
17 134.0 0.5 0.5
17.2 136.0 0.5 0.5
18 138.0 0.5 0.5
18.2 140.0 0.5 0.5
19 142.0 0.5 0.5
>19 142.51 0.0 5.49
Marker Name D16S539
<5 233.49 4.49 0.0
5 234.0 0.5 0.5
6 238.0 0.5 0.5
7 242.0 0.5 0.5
8 246.0 0.5 0.5
9 250.0 0.5 0.5
10 254.0 0.5 0.5
11 258.0 0.5 0.5
12 262.0 0.5 0.5
12.2 264.0 0.5 0.5
13 266.0 0.5 0.5
14 270.0 0.5 0.5
15 274.0 0.5 0.5
>15 274.51 0.0 4.49
Marker Name AMEL
X 107.0 0.5 0.5
Y 113.0 0.5 0.5
Marker Name TH01
<5 168.49 9.49 0.0
5 169.0 0.5 0.5
5.3 172.0 0.5 0.4
6 173.0 0.4 0.5
6.3 176.0 0.5 0.4
7 177.0 0.4 0.5
7.3 180.0 0.5 0.4
8 181.0 0.4 0.5
8.3 184.0 0.5 0.4
9 185.0 0.4 0.5
9.3 188.0 0.5 0.4
10 189.0 0.4 0.5
>10 189.51 0.0 15.49
Marker Name TPOX
<6 217.49 5.49 0.0
6 218.0 0.5 0.5
7 222.0 0.5 0.5
8 226.0 0.5 0.5
9 230.0 0.5 0.5
10 234.0 0.5 0.5
11 238.0 0.5 0.5
12 242.0 0.5 0.5
13 246.0 0.5 0.5
>13 246.51 0.0 7.49
Marker Name CSF1PO
<6 280.49 5.49 0.0
6 281.0 0.5 0.5
7 285.0 0.5 0.5
8 289.0 0.5 0.5
9 293.0 0.5 0.5
10 297.0 0.5 0.5
10.2 299.0 0.5 0.5
11 301.0 0.5 0.5
12 305.0 0.5 0.5
13 309.0 0.5 0.5
14 313.0 0.5 0.5
15 317.0 0.5 0.5
>15 317.51 0.0 5.49
Marker Name D7S820
<6 257.49 6.49 0.0
6 258.0 0.5 0.5
7 262.0 0.5 0.5
8 266.0 0.5 0.5
8.2 268.0 0.5 0.5
9 270.0 0.5 0.5
9.2 272.0 0.5 0.5
10 274.0 0.5 0.5
11 278.0 0.5 0.5
12 282.0 0.5 0.5
13 286.0 0.5 0.5
14 290.0 0.5 0.5
15 294.0 0.5 0.5
>15 294.51 0.0 3.99
Panel Name Identifiler_CODIS_v1
Marker Name D8S1179
<8 127.49 9.49 0.0
8 128.0 0.5 0.5
9 132.0 0.5 0.5
10 136.0 0.5 0.5
11 140.0 0.5 0.5
12 144.0 0.5 0.5
13 148.0 0.5 0.5
14 152.0 0.5 0.5
15 156.0 0.5 0.5
16 160.0 0.5 0.5
17 164.0 0.5 0.5
18 168.0 0.5 0.5
19 172.0 0.5 0.5
>19 172.51 0.0 10.99
Marker Name D21S11
<24.2 188.49 3.99 0.0
24.2 189.0 0.5 0.5
25 191.0 0.5 0.5
25.2 193.0 0.5 0.5
26 195.0 0.5 0.5
26.2 197.0 0.5 0.5
27 199.0 0.5 0.5
27.2 201.0 0.5 0.5
28 203.0 0.5 0.5
28.2 205.0 0.5 0.5
29 207.0 0.5 0.5
29.2 209.0 0.5 0.5
30 211.0 0.5 0.5
30.2 213.0 0.5 0.5
31 215.0 0.5 0.5
31.2 217.0 0.5 0.5
32 219.0 0.5 0.5
32.2 221.0 0.5 0.5
33 223.0 0.5 0.5
33.2 225.0 0.5 0.5
34 227.0 0.5 0.5
34.2 229.0 0.5 0.5
35 231.0 0.5 0.5
35.2 233.0 0.5 0.5
36 235.0 0.5 0.5
36.2 237.0 0.5 0.5
37 239.0 0.5 0.5
37.2 241.0 0.5 0.5
38 243.0 0.5 0.5
>38 243.51 0.0 3.99
Marker Name D7S820
<6 257.49 6.49 0.0
6 258.0 0.5 0.5
7 262.0 0.5 0.5
8 266.0 0.5 0.5
8.2 268.0 0.5 0.5
9 270.0 0.5 0.5
9.2 272.0 0.5 0.5
10 274.0 0.5 0.5
11 278.0 0.5 0.5
12 282.0 0.5 0.5
13 286.0 0.5 0.5
14 290.0 0.5 0.5
15 294.0 0.5 0.5
>15 294.51 0.0 3.99
Marker Name CSF1PO
<6 306.12 4.0 0.0
6 306.63 0.5 0.5
7 310.63 0.5 0.5
8 314.63 0.5 0.5
9 318.63 0.5 0.5
10 322.63 0.5 0.5
10.2 324.63 0.5 0.5
11 326.63 0.5 0.5
12 330.63 0.5 0.5
13 334.63 0.5 0.5
14 338.63 0.5 0.5
15 342.63 0.5 0.5
>15 343.14 0.0 5.49
Marker Name D3S1358
<12 113.49 15.49 0.0
12 114.0 0.5 0.5
13 118.0 0.5 0.5
14 122.0 0.5 0.5
15 126.0 0.5 0.5
15.2 128.0 0.5 0.5
16 130.0 0.5 0.5
16.2 132.0 0.5 0.5
17 134.0 0.5 0.5
17.2 136.0 0.5 0.5
18 138.0 0.5 0.5
18.2 140.0 0.5 0.5
19 142.0 0.5 0.5
>19 142.51 0.0 5.49
Marker Name TH01
<4 164.49 5.49 0.0
4 165.0 0.5 0.5
5 169.0 0.5 0.5
5.3 172.0 0.5 0.4
6 173.0 0.4 0.5
6.3 176.0 0.5 0.4
7 177.0 0.4 0.5
7.3 180.0 0.5 0.4
8 181.0 0.4 0.5
8.3 184.0 0.5 0.4
9 185.0 0.4 0.5
9.3 188.0 0.5 0.4
10 189.0 0.4 0.5
10.3 192.0 0.5 0.4
11 193.0 0.4 0.5
12 197.0 0.5 0.5
13 201.0 0.5 0.5
13.3 204.0 0.5 0.5
>13.3 204.51 0.0 0.49
Marker Name D13S317
<8 217.14 11.48 0.0
8 217.65 0.5 0.5
9 221.65 0.5 0.5
10 225.65 0.5 0.5
11 229.65 0.5 0.5
12 233.65 0.5 0.5
13 237.65 0.5 0.5
14 241.65 0.5 0.5
15 245.65 0.5 0.5
>15 246.16 0.0 4.0
Marker Name D16S539
<5 256.79 1.49 0.0
5 257.3 0.5 0.5
6 261.3 0.5 0.5
7 265.3 0.5 0.5
8 269.3 0.5 0.5
9 273.3 0.5 0.5
10 277.3 0.5 0.5
11 281.3 0.5 0.5
12 285.3 0.5 0.5
12.2 287.3 0.5 0.5
13 289.3 0.5 0.5
14 293.3 0.5 0.5
15 297.3 0.5 0.5
>15 297.81 0.0 4.0
Marker Name D2S1338
<15 308.8 4.0 0.0
15 309.31 0.5 0.5
16 313.31 0.5 0.5
17 317.31 0.5 0.5
18 319.31 0.5 0.5
19 325.31 0.5 0.5
20 329.31 0.5 0.5
21 333.31 0.5 0.5
22 337.31 0.5 0.5
23 341.31 0.5 0.5
24 345.31 0.5 0.5
25 349.31 0.5 0.5
26 353.31 0.5 0.5
27 357.31 0.5 0.5
28 361.31 0.5 0.5
>28 361.82 0.0 8.49
Marker Name D19S433
<9 105.49 4.48 0.0
9 106.0 0.5 0.5
9.2 108.0 0.5 0.5
10 110.0 0.5 0.5
10.2 112.0 0.5 0.5
11 114.0 0.5 0.5
11.2 116.0 0.5 0.5
12 118.0 0.5 0.5
12.2 120.0 0.5 0.5
13 122.0 0.5 0.5
13.2 124.0 0.5 0.5
14 126.0 0.5 0.5
14.2 128.0 0.5 0.5
15 130.0 0.5 0.5
15.2 132.0 0.5 0.5
16 134.0 0.5 0.5
16.2 136.0 0.5 0.5
17 138.0 0.5 0.5
17.2 140.0 0.5 0.5
>17.2 140.51 0.0 7.49
Marker Name vWA
<11 156.49 5.49 0.0
11 157.0 0.5 0.5
12 161.0 0.5 0.5
13 165.0 0.5 0.5
14 169.0 0.5 0.5
15 173.0 0.5 0.5
15.2 175.0 0.5 0.5
16 177.0 0.5 0.5
17 181.0 0.5 0.5
18 185.0 0.5 0.5
18.2 187.0 0.5 0.5
19 189.0 0.5 0.5
20 193.0 0.5 0.5
21 197.0 0.5 0.5
>21 197.51 0.0 15.99
Marker Name TPOX
<6 224.48 7.48 0.0
6 224.99 0.5 0.5
7 228.99 0.5 0.5
8 232.99 0.5 0.5
9 236.99 0.5 0.5
10 240.99 0.5 0.5
11 244.99 0.5 0.5
12 248.99 0.5 0.5
13 252.99 0.5 0.5
>13 253.5 0.0 7.49
Marker Name D18S51
<7 264.49 0.0 0.0
7 265.0 0.5 0.5
8 269.0 0.5 0.5
9 273.0 0.5 0.5
9.2 275.0 0.5 0.5
10 277.0 0.5 0.5
10.2 279.0 0.5 0.5
11 281.0 0.5 0.5
12 285.0 0.5 0.5
12.2 287.0 0.5 0.5
13 289.0 0.5 0.5
13.2 291.0 0.5 0.5
14 293.0 0.5 0.5
14.2 295.0 0.5 0.5
15 297.0 0.5 0.5
15.2 299.0 0.5 0.5
16 301.0 0.5 0.5
16.2 303.0 0.5 0.5
17 305.0 0.5 0.5
17.2 307.0 0.5 0.5
18 309.0 0.5 0.5
18.2 311.0 0.5 0.5
19 313.0 0.5 0.5
19.2 315.0 0.5 0.5
20 317.0 0.5 0.5
20.2 319.0 0.5 0.5
21 321.0 0.5 0.5
21.2 323.0 0.5 0.5
22 325.0 0.5 0.5
23 329.0 0.5 0.5
24 333.0 0.5 0.5
25 337.0 0.5 0.5
26 341.0 0.5 0.5
>26 341.51 0.0 8.49
Marker Name AMEL
X 107.0 0.5 0.5
Y 113.0 0.5 0.5
Marker Name D5S818
<7 134.49 6.49 0.0
7 135.0 0.5 0.5
8 139.0 0.5 0.5
9 143.0 0.5 0.5
10 147.0 0.5 0.5
11 151.0 0.5 0.5
12 155.0 0.5 0.5
13 159.0 0.5 0.5
14 163.0 0.5 0.5
15 167.0 0.5 0.5
16 171.0 0.5 0.5
>16 171.51 0.0 8.49
Marker Name FGA
<18 218.49 12.24 0.0
18 219.0 0.5 0.5
18.2 221.0 0.5 0.5
19 223.0 0.5 0.5
19.2 225.0 0.5 0.5
20 227.0 0.5 0.5
20.2 229.0 0.5 0.5
21 231.0 0.5 0.5
21.2 233.0 0.5 0.5
22 235.0 0.5 0.5
22.2 237.0 0.5 0.5
23 239.0 0.5 0.5
23.2 241.0 0.5 0.5
24 243.0 0.5 0.5
24.2 245.0 0.5 0.5
25 247.0 0.5 0.5
25.2 249.0 0.5 0.5
26 251.0 0.5 0.5
26.2 253.0 0.5 0.5
27 255.0 0.5 0.5
27.2 257.0 0.5 0.5
28 259.0 0.5 0.5
28.2 261.0 0.5 0.5
29 263.0 0.5 0.5
29.2 265.0 0.5 0.5
30 267.0 0.5 0.5
>30 267.51 0.0 92.49
Panel Name Identifiler_v1-dup
Marker Name D8S1179
7 124.0 0.5 0.5
8 128.0 0.5 0.5
9 132.0 0.5 0.5
10 136.0 0.5 0.5
11 140.0 0.5 0.5
12 144.0 0.5 0.5
13 148.0 0.5 0.5
14 152.0 0.5 0.5
15 156.0 0.5 0.5
16 160.0 0.5 0.5
17 164.0 0.5 0.5
18 168.0 0.5 0.5
19 172.0 0.5 0.5
20 176.0 0.5 0.5
Marker Name D21S11
23.2 185.0 0.5 0.5
24 187.0 0.5 0.5
24.2 189.0 0.5 0.5
25 191.0 0.5 0.5
25.2 193.0 0.5 0.5
26 195.0 0.5 0.5
26.2 197.0 0.5 0.5
27 199.0 0.5 0.5
27.2 201.0 0.5 0.5
28 203.0 0.5 0.5
28.2 205.0 0.5 0.5
29 207.0 0.5 0.5
29.2 209.0 0.5 0.5
30 211.0 0.5 0.5
30.2 213.0 0.5 0.5
31 215.0 0.5 0.5
31.2 217.0 0.5 0.5
32 219.0 0.5 0.5
32.2 221.0 0.5 0.5
33 223.0 0.5 0.5
33.2 225.0 0.5 0.5
34 227.0 0.5 0.5
34.2 229.0 0.5 0.5
35 231.0 0.5 0.5
35.2 233.0 0.5 0.5
36 235.0 0.5 0.5
36.2 237.0 0.5 0.5
37 239.0 0.5 0.5
37.2 241.0 0.5 0.5
38 243.0 0.5 0.5
38.2 245.0 0.5 0.5
39 247.0 0.5 0.5
Marker Name D7S820
5 254.0 0.5 0.5
5.2 256.0 0.5 0.5
6 258.0 0.5 0.5
7 262.0 0.5 0.5
8 266.0 0.5 0.5
8.2 268.0 0.5 0.5
9 270.0 0.5 0.5
9.2 272.0 0.5 0.5
10 274.0 0.5 0.5
11 278.0 0.5 0.5
12 282.0 0.5 0.5
13 286.0 0.5 0.5
14 290.0 0.5 0.5
15 294.0 0.5 0.5
16 298.0 0.5 0.5
Marker Name CSF1PO
15 342.63 0.5 0.5
16 346.63 0.5 0.5
5 302.63 0.5 0.5
6 306.63 0.5 0.5
7 310.63 0.5 0.5
8 314.63 0.5 0.5
9 318.63 0.5 0.5
10 322.63 0.5 0.5
10.2 324.63 0.5 0.5
11 326.63 0.5 0.5
12 330.63 0.5 0.5
13 334.63 0.5 0.5
14 338.63 0.5 0.5
Marker Name D3S1358
11 110.0 0.5 0.5
12 114.0 0.5 0.5
13 118.0 0.5 0.5
14 122.0 0.5 0.5
15 126.0 0.5 0.5
15.2 128.0 0.5 0.5
16 130.0 0.5 0.5
16.2 132.0 0.5 0.5
17 134.0 0.5 0.5
17.2 136.0 0.5 0.5
18 138.0 0.5 0.5
18.2 140.0 0.5 0.5
19 142.0 0.5 0.5
20 146.0 0.5 0.5
Marker Name TH01
3 161.0 0.5 0.5
4 165.0 0.5 0.5
5 169.0 0.5 0.5
5.3 172.0 0.5 0.4
6 173.0 0.4 0.5
6.3 176.0 0.5 0.4
7 177.0 0.4 0.5
7.3 180.0 0.5 0.4
8 181.0 0.4 0.5
8.3 184.0 0.5 0.4
9 185.0 0.4 0.5
9.3 188.0 0.5 0.4
10 189.0 0.4 0.5
10.3 192.0 0.5 0.4
11 193.0 0.4 0.5
12 197.0 0.5 0.5
13 201.0 0.5 0.5
13.3 204.0 0.5 0.5
Marker Name D13S317
7 213.65 0.5 0.5
8 217.65 0.5 0.5
9 221.65 0.5 0.5
10 225.65 0.5 0.5
11 229.65 0.5 0.5
12 233.65 0.5 0.5
13 237.65 0.5 0.5
14 241.65 0.5 0.5
15 245.65 0.5 0.5
16 249.65 0.5 0.5
Marker Name D16S539
5 257.3 0.5 0.5
6 261.3 0.5 0.5
7 265.3 0.5 0.5
8 269.3 0.5 0.5
9 273.3 0.5 0.5
10 277.3 0.5 0.5
11 281.3 0.5 0.5
12 285.3 0.5 0.5
12.2 287.3 0.5 0.5
13 289.3 0.5 0.5
14 293.3 0.5 0.5
15 297.3 0.5 0.5
16 301.3 0.5 0.5
Marker Name D2S1338
14 305.31 0.5 0.5
15 309.31 0.5 0.5
16 313.31 0.5 0.5
17 317.31 0.5 0.5
18 319.31 0.5 0.5
19 325.31 0.5 0.5
20 329.31 0.5 0.5
21 333.31 0.5 0.5
22 337.31 0.5 0.5
23 341.31 0.5 0.5
24 345.31 0.5 0.5
25 349.31 0.5 0.5
26 353.31 0.5 0.5
27 357.31 0.5 0.5
28 361.31 0.5 0.5
29 365.31 0.5 0.5
Marker Name D19S433
9 106.0 0.5 0.5
9.2 108.0 0.5 0.5
10 110.0 0.5 0.5
10.2 112.0 0.5 0.5
11 114.0 0.5 0.5
11.2 116.0 0.5 0.5
12 118.0 0.5 0.5
12.2 120.0 0.5 0.5
13 122.0 0.5 0.5
13.2 124.0 0.5 0.5
14 126.0 0.5 0.5
14.2 128.0 0.5 0.5
15 130.0 0.5 0.5
15.2 132.0 0.5 0.5
16 134.0 0.5 0.5
16.2 136.0 0.5 0.5
17 138.0 0.5 0.5
17.2 140.0 0.5 0.5
18 142.0 0.5 0.5
18.2 144.0 0.5 0.5
Marker Name vWA
22 201.0 0.5 0.5
23 205.0 0.5 0.5
24 209.0 0.5 0.5
25 213.0 0.5 0.5
10 153.0 0.5 0.5
11 157.0 0.5 0.5
12 161.0 0.5 0.5
13 165.0 0.5 0.5
14 169.0 0.5 0.5
15 173.0 0.5 0.5
15.2 175.0 0.5 0.5
16 177.0 0.5 0.5
17 181.0 0.5 0.5
18 185.0 0.5 0.5
18.2 187.0 0.5 0.5
19 189.0 0.5 0.5
20 193.0 0.5 0.5
21 197.0 0.5 0.5
Marker Name TPOX
5 220.99 0.5 0.5
6 224.99 0.5 0.5
7 228.99 0.5 0.5
8 232.99 0.5 0.5
9 236.99 0.5 0.5
10 240.99 0.5 0.5
11 244.99 0.5 0.5
12 248.99 0.5 0.5
13 252.99 0.5 0.5
14 256.99 0.5 0.5
Marker Name D18S51
7 265.0 0.5 0.5
8 269.0 0.5 0.5
9 273.0 0.5 0.5
9.2 275.0 0.5 0.5
10 277.0 0.5 0.5
10.2 279.0 0.5 0.5
11 281.0 0.5 0.5
11.2 283.0 0.5 0.5
12 285.0 0.5 0.5
12.2 287.0 0.5 0.5
13 289.0 0.5 0.5
13.2 291.0 0.5 0.5
14 293.0 0.5 0.5
14.2 295.0 0.5 0.5
15 297.0 0.5 0.5
15.2 299.0 0.5 0.5
16 301.0 0.5 0.5
16.2 303.0 0.5 0.5
17 305.0 0.5 0.5
17.2 307.0 0.5 0.5
18 309.0 0.5 0.5
18.2 311.0 0.5 0.5
19 313.0 0.5 0.5
19.2 315.0 0.5 0.5
20 317.0 0.5 0.5
20.2 319.0 0.5 0.5
21 321.0 0.5 0.5
21.2 323.0 0.5 0.5
22 325.0 0.5 0.5
22.2 327.0 0.5 0.5
23 329.0 0.5 0.5
23.2 331.0 0.5 0.5
24 333.0 0.5 0.5
25 337.0 0.5 0.5
26 341.0 0.5 0.5
27 345.0 0.5 0.5
Marker Name AMEL
X 107.0 0.5 0.5
Y 113.0 0.5 0.5
Marker Name D5S818
6 131.0 0.5 0.5
7 135.0 0.5 0.5
8 139.0 0.5 0.5
9 143.0 0.5 0.5
10 147.0 0.5 0.5
11 151.0 0.5 0.5
12 155.0 0.5 0.5
13 159.0 0.5 0.5
14 163.0 0.5 0.5
15 167.0 0.5 0.5
16 171.0 0.5 0.5
17 175.0 0.5 0.5
Marker Name FGA
49.2 345.0 0.5 0.5
50.2 349.0 0.5 0.5
51.2 353.0 0.5 0.5
16 211.0 0.5 0.5
16.2 213.0 0.5 0.5
17 215.0 0.5 0.5
17.2 217.0 0.5 0.5
18 219.0 0.5 0.5
18.2 221.0 0.5 0.5
19 223.0 0.5 0.5
19.2 225.0 0.5 0.5
20 227.0 0.5 0.5
20.2 229.0 0.5 0.5
21 231.0 0.5 0.5
21.2 233.0 0.5 0.5
22 235.0 0.5 0.5
22.2 237.0 0.5 0.5
23 239.0 0.5 0.5
23.2 241.0 0.5 0.5
24 243.0 0.5 0.5
24.2 245.0 0.5 0.5
25 247.0 0.5 0.5
25.2 249.0 0.5 0.5
26 251.0 0.5 0.5
26.2 253.0 0.5 0.5
27 255.0 0.5 0.5
27.2 257.0 0.5 0.5
28 259.0 0.5 0.5
28.2 261.0 0.5 0.5
29 263.0 0.5 0.5
29.2 265.0 0.5 0.5
30 267.0 0.5 0.5
30.2 269.0 0.5 0.5
31 271.0 0.5 0.5
31.2 273.0 0.5 0.5
32 275.0 0.5 0.5
32.2 277.0 0.5 0.5
33.2 281.0 0.5 0.5
34.2 285.0 0.5 0.5
42.2 317.0 0.5 0.5
43.2 321.0 0.5 0.5
44.2 325.0 0.5 0.5
45.2 329.0 0.5 0.5
46.2 333.0 0.5 0.5
47.2 337.0 0.5 0.5
48.2 341.0 0.5 0.5
\ No newline at end of file
diff --git a/inst/abif/AmpFLSTR_Panels_v1.txt b/inst/abif/AmpFLSTR_Panels_v1.txt
new file mode 100644
index 0000000..398940a
--- /dev/null
+++ b/inst/abif/AmpFLSTR_Panels_v1.txt
@@ -0,0 +1 @@
+#GeneMapper ID v3.1 Last edited 071403
#GMv3.0 for import into GeneMapper ID v3.1
Version GMv3.0
Kit type Microsatellite
Chemistry Kit AmpFLSTR_Panels_v1
Panel Blue_v1
D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
vWA blue 151.00 203.00 17,18 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
FGA blue 206.25 360.00 23,24 4 0.110 none 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30,
Panel Green_I_v1
AMEL green 106.00 114.00 x 9 0.060 none X, Y,
TH01 green 159.00 205.00 8,9.3 4 0.060 none 5, 6, 7, 8, 9, 9.3, 10,
TPOX green 212.00 254.00 8 4 0.060 none 6, 7, 8, 9, 10, 11, 12, 13,
CSF1PO green 275.00 323.00 10,12 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
Panel Profiler_v1
D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
vWA blue 151.00 203.00 17,18 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
FGA blue 206.25 360.00 23,24 4 0.110 none 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30,
AMEL green 106.00 114.00 x 9 0.000 none X, Y,
TH01 green 159.00 205.00 8,9.3 4 0.060 none 5, 6, 7, 8, 9, 9.3, 10,
TPOX green 212.00 254.00 8 4 0.060 none 6, 7, 8, 9, 10, 11, 12, 13,
CSF1PO green 275.00 323.00 10,12 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
D5S818 yellow 128.00 180.00 11 4 0.100 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
D13S317 yellow 192.00 242.00 11 4 0.100 none 8, 9, 10, 11, 12, 13, 14, 15,
D7S820 yellow 251.00 298.50 10,11 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
Panel Profiler_Plus_v1
D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
vWA blue 151.00 203.00 17,18 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
FGA blue 206.25 360.00 23,24 4 0.110 none 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30,
AMEL green 106.00 114.00 x 9 0.000 none X, Y,
D8S1179 green 118.00 183.50 13 4 0.120 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
D21S11 green 184.50 247.50 30 4 0.130 none 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 38,
D18S51 green 264.49 350.00 15,19 4 0.160 none 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
D5S818 yellow 128.00 180.00 11 4 0.100 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
D13S317 yellow 192.00 242.00 11 4 0.100 none 8, 9, 10, 11, 12, 13, 14, 15,
D7S820 yellow 251.00 298.50 10,11 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
Panel COfiler_v1
D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
D16S539 blue 229.00 279.00 11,12 4 0.130 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
AMEL green 106.00 114.00 x 9 0.000 none X, Y,
TH01 green 159.00 205.00 8,9.3 4 0.060 none 5, 6, 7, 8, 9, 9.3, 10,
TPOX green 212.00 254.00 8 4 0.060 none 6, 7, 8, 9, 10, 11, 12, 13,
CSF1PO green 275.00 323.00 10,12 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
D7S820 yellow 251.00 298.50 10,11 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
Panel SGM_Plus_v1
D3S1358 blue 98.00 148.00 15,16 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
vWA blue 151.00 213.50 14,16 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
D16S539 blue 229.00 279.00 9,10 4 0.130 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
D2S1338 blue 284.00 354.00 20,23 4 0.150 none 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
AMEL green 106.00 114.00 x,y 9 0.000 none X, Y,
D8S1179 green 118.00 183.50 12,13 4 0.120 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
D21S11 green 184.50 247.50 28,31 4 0.130 none 24, 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
D18S51 green 264.49 350.00 12,15 4 0.160 none 7, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
D19S433 yellow 101.00 148.00 14,15 4 0.170 none 9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2,
TH01 yellow 159.00 205.00 7,9.3 4 0.060 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
FGA yellow 206.25 360.00 24,26 4 0.110 none 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30, 30.2, 31.2, 32.2, 33.2, 42.2, 43.2, 44.2, 45.2, 46.2, 47.2, 48.2, 50.2, 51.2,
Panel Identifiler_v1
D8S1179 blue 118.00 183.50 13 4 0.082 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
D21S11 blue 184.50 247.50 30 4 0.094 none 24, 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
D7S820 blue 251.00 298.50 10,11 4 0.082 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
CSF1PO blue 302.12 348.63 10,12 4 0.092 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
D3S1358 green 98.00 148.00 14,15 4 0.107 none 12, 13, 14, 15, 16, 17, 18, 19,
TH01 green 159.00 205.00 8,9.3 4 0.051 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
D13S317 green 205.65 250.16 11 4 0.080 none 8, 9, 10, 11, 12, 13, 14, 15,
D16S539 green 255.30 301.81 11,12 4 0.104 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
D2S1338 green 304.80 370.31 19,23 4 0.111 none 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
D19S433 yellow 101.00 148.00 14,15 4 0.133 none 9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2,
vWA yellow 151.00 213.50 17,18 4 0.126 none 11,12,13,14,15,16,17,18,19,20,21,22,23,24,
TPOX yellow 216.99 260.99 8 4 0.048 none 6, 7, 8, 9, 10, 11, 12, 13,
D18S51 yellow 264.49 350.00 15,19 4 0.170 none 7, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
AMEL red 106.00 114.00 x 9 0.000 none X, Y,
D5S818 red 128.00 180.00 11 4 0.068 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
FGA red 206.25 360.00 23,24 4 0.147 none 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30, 30.2, 31.2, 32.2, 33.2, 42.2, 43.2, 44.2, 45.2, 46.2, 47.2, 48.2, 50.2, 51.2,
Panel SEfiler_v1
D3S1358 blue 98.00 148.00 15,16 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
vWA blue 151.00 213.50 14,16 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
D16S539 blue 229.00 279.00 9,10 4 0.130 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
D2S1338 blue 284.00 350.00 20,23 4 0.150 none 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
AMEL green 106.00 114.00 x,Y 9 0.000 none X, Y"
D8S1179 green 118.00 183.50 12,13 4 0.120 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
SE33 green 190.00 350.00 17,25.2 4 0.120 none 4.2, 6.3, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20.2, 21, 21.1, 21.2, 22.2, 23.2, 24.2, 25.2, 26.2, 27.2, 28.2, 29.2, 30.2, 31.2, 32.2, 33.2, 34.2, 35, 35.2, 36, 37,
D19S433 yellow 101.00 148.00 14,15 4 0.133 none 9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2,
TH01 yellow 159.00 205.00 7,9.3 4 0.060 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
FGA yellow 206.25 360.00 24,26 4 0.110 none 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30, 30.2, 31.2, 32.2, 33.2, 42.2, 43.2, 44.2, 45.2, 46.2, 47.2, 48.2, 50.2, 51.2,
D21S11 red 184.50 247.50 28,31 4 0.130 none 24, 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
D18S51 red 264.49 350.00 12,15 4 0.170 none 7, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
Panel Profiler_Plus_CODIS_v1
D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
vWA blue 151.00 203.00 17,18 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
FGA blue 206.25 360.00 23,24 4 0.110 none 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30,
AMEL green 106.00 114.00 x 9 0.000 none X, Y,
D8S1179 green 118.00 183.50 13 4 0.120 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
D21S11 green 184.50 247.50 30 4 0.130 none 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 38,
D18S51 green 264.49 350.00 15,19 4 0.160 none 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
D5S818 yellow 128.00 180.00 11 4 0.010 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
D13S317 yellow 192.00 242.00 11 4 0.010 none 8, 9, 10, 11, 12, 13, 14, 15,
D7S820 yellow 251.00 298.50 10,11 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
Panel COfiler_CODIS_v1
D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
D16S539 blue 229.00 279.00 11,12 4 0.130 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
AMEL green 106.00 114.00 x 9 0.000 none X, Y,
TH01 green 159.00 205.00 8,9.3 4 0.060 none 5, 6, 7, 8, 9, 9.3, 10,
TPOX green 212.00 254.00 8 4 0.060 none 6, 7, 8, 9, 10, 11, 12, 13,
CSF1PO green 275.00 323.00 10,12 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
D7S820 yellow 251.00 298.50 10,11 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
Panel Identifiler_CODIS_v1
D8S1179 blue 118.00 183.50 13 4 0.082 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
D21S11 blue 184.50 247.50 30 4 0.094 none 24, 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
D7S820 blue 251.00 298.50 10,11 4 0.082 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
CSF1PO blue 302.12 348.63 10,12 4 0.092 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
D3S1358 green 98.00 148.00 14,15 4 0.107 none 12, 13, 14, 15, 16, 17, 18, 19,
TH01 green 159.00 205.00 8,9.3 4 0.051 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
D13S317 green 205.65 250.16 11 4 0.080 none 8, 9, 10, 11, 12, 13, 14, 15,
D16S539 green 255.30 301.81 11,12 4 0.104 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
D2S1338 green 304.80 370.31 19,23 4 0.111 none 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
D19S433 yellow 101.00 148.00 14,15 4 0.133 none 9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2,
vWA yellow 151.00 213.50 17,18 4 0.126 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
TPOX yellow 216.99 260.99 8 4 0.048 none 6, 7, 8, 9, 10, 11, 12, 13,
D18S51 yellow 264.49 350.00 15,19 4 0.170 none 7, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
AMEL red 106.00 114.00 x 9 0.000 none X, Y,
D5S818 red 128.00 180.00 11 4 0.068 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
FGA red 206.25 360.00 23,24 4 0.147 none 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30, 30.2, 31.2, 32.2, 33.2, 42.2, 43.2, 44.2, 45.2, 46.2, 47.2, 48.2, 50.2, 51.2,
\ No newline at end of file
diff --git a/inst/abif/NGM_Bins.txt b/inst/abif/NGM_Bins.txt
new file mode 100644
index 0000000..0ea15ba
--- /dev/null
+++ b/inst/abif/NGM_Bins.txt
@@ -0,0 +1,3689 @@
+Version GM v 3.0
+Chemistry Kit AmpFLSTR_Panels_v1 Promega_Panels_v1 NextGen
+BinSet Name AmpFLSTR_Bins_v1
+Panel Name Blue_v1
+Marker Name D3S1358
+11 110.0 0.5 0.5
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+20 146.0 0.5 0.5
+Marker Name vWA
+10 153.0 0.5 0.5
+11 157.0 0.5 0.5
+12 161.0 0.5 0.5
+13 165.0 0.5 0.5
+14 169.0 0.5 0.5
+15 173.0 0.5 0.5
+15.2 175.0 0.5 0.5
+16 177.0 0.5 0.5
+17 181.0 0.5 0.5
+18 185.0 0.5 0.5
+18.2 187.0 0.5 0.5
+19 189.0 0.5 0.5
+20 193.0 0.5 0.5
+21 197.0 0.5 0.5
+22 201.0 0.5 0.5
+Marker Name FGA
+17 215.0 0.5 0.5
+17.2 217.0 0.5 0.5
+18 219.0 0.5 0.5
+18.2 221.0 0.5 0.5
+19 223.0 0.5 0.5
+19.2 225.0 0.5 0.5
+20 227.0 0.5 0.5
+20.2 229.0 0.5 0.5
+21 231.0 0.5 0.5
+21.2 233.0 0.5 0.5
+22 235.0 0.5 0.5
+22.2 237.0 0.5 0.5
+23 239.0 0.5 0.5
+23.2 241.0 0.5 0.5
+24 243.0 0.5 0.5
+24.2 245.0 0.5 0.5
+25 247.0 0.5 0.5
+25.2 249.0 0.5 0.5
+26 251.0 0.5 0.5
+26.2 253.0 0.5 0.5
+27 255.0 0.5 0.5
+27.2 257.0 0.5 0.5
+28 259.0 0.5 0.5
+28.2 261.0 0.5 0.5
+29 263.0 0.5 0.5
+29.2 265.0 0.5 0.5
+30 267.0 0.5 0.5
+30.2 269.0 0.5 0.5
+31 271.0 0.5 0.5
+Panel Name Green_I_v1
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name TH01
+4 165.0 0.5 0.5
+5 169.0 0.5 0.5
+5.3 172.0 0.5 0.4
+6 173.0 0.4 0.5
+6.3 176.0 0.5 0.4
+7 177.0 0.4 0.5
+7.3 180.0 0.5 0.4
+8 181.0 0.4 0.5
+8.3 184.0 0.5 0.4
+9 185.0 0.4 0.5
+9.3 188.0 0.5 0.4
+10 189.0 0.4 0.5
+10.3 192.0 0.5 0.4
+11 193.0 0.4 0.5
+Marker Name TPOX
+5 214.0 0.5 0.5
+6 218.0 0.5 0.5
+7 222.0 0.5 0.5
+8 226.0 0.5 0.5
+9 230.0 0.5 0.5
+10 234.0 0.5 0.5
+11 238.0 0.5 0.5
+12 242.0 0.5 0.5
+13 246.0 0.5 0.5
+14 250.0 0.5 0.5
+Marker Name CSF1PO
+6 281.0 0.5 0.5
+7 285.0 0.5 0.5
+8 289.0 0.5 0.5
+9 293.0 0.5 0.5
+10 297.0 0.5 0.5
+10.2 299.0 0.5 0.5
+11 301.0 0.5 0.5
+12 305.0 0.5 0.5
+13 309.0 0.5 0.5
+14 313.0 0.5 0.5
+15 317.0 0.5 0.5
+16 321.0 0.5 0.5
+Panel Name Profiler_v1
+Marker Name D3S1358
+11 110.0 0.5 0.5
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+20 146.0 0.5 0.5
+Marker Name vWA
+10 153.0 0.5 0.5
+11 157.0 0.5 0.5
+12 161.0 0.5 0.5
+13 165.0 0.5 0.5
+14 169.0 0.5 0.5
+15 173.0 0.5 0.5
+15.2 175.0 0.5 0.5
+16 177.0 0.5 0.5
+17 181.0 0.5 0.5
+18 185.0 0.5 0.5
+18.2 187.0 0.5 0.5
+19 189.0 0.5 0.5
+20 193.0 0.5 0.5
+21 197.0 0.5 0.5
+22 201.0 0.5 0.5
+Marker Name FGA
+17 215.0 0.5 0.5
+17.2 217.0 0.5 0.5
+18 219.0 0.5 0.5
+18.2 221.0 0.5 0.5
+19 223.0 0.5 0.5
+19.2 225.0 0.5 0.5
+20 227.0 0.5 0.5
+20.2 229.0 0.5 0.5
+21 231.0 0.5 0.5
+21.2 233.0 0.5 0.5
+22 235.0 0.5 0.5
+22.2 237.0 0.5 0.5
+23 239.0 0.5 0.5
+23.2 241.0 0.5 0.5
+24 243.0 0.5 0.5
+24.2 245.0 0.5 0.5
+25 247.0 0.5 0.5
+25.2 249.0 0.5 0.5
+26 251.0 0.5 0.5
+26.2 253.0 0.5 0.5
+27 255.0 0.5 0.5
+27.2 257.0 0.5 0.5
+28 259.0 0.5 0.5
+28.2 261.0 0.5 0.5
+29 263.0 0.5 0.5
+29.2 265.0 0.5 0.5
+30 267.0 0.5 0.5
+30.2 269.0 0.5 0.5
+31 271.0 0.5 0.5
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name TH01
+4 165.0 0.5 0.5
+5 169.0 0.5 0.5
+5.3 172.0 0.5 0.4
+6 173.0 0.4 0.5
+6.3 176.0 0.5 0.4
+7 177.0 0.4 0.5
+7.3 180.0 0.5 0.4
+8 181.0 0.4 0.5
+8.3 184.0 0.5 0.4
+9 185.0 0.4 0.5
+9.3 188.0 0.5 0.4
+10 189.0 0.4 0.5
+10.3 192.0 0.5 0.4
+11 193.0 0.4 0.5
+Marker Name TPOX
+5 214.0 0.5 0.5
+6 218.0 0.5 0.5
+7 222.0 0.5 0.5
+8 226.0 0.5 0.5
+9 230.0 0.5 0.5
+10 234.0 0.5 0.5
+11 238.0 0.5 0.5
+12 242.0 0.5 0.5
+13 246.0 0.5 0.5
+14 250.0 0.5 0.5
+Marker Name CSF1PO
+6 281.0 0.5 0.5
+7 285.0 0.5 0.5
+8 289.0 0.5 0.5
+9 293.0 0.5 0.5
+10 297.0 0.5 0.5
+10.2 299.0 0.5 0.5
+11 301.0 0.5 0.5
+12 305.0 0.5 0.5
+13 309.0 0.5 0.5
+14 313.0 0.5 0.5
+15 317.0 0.5 0.5
+16 321.0 0.5 0.5
+Marker Name D5S818
+6 131.0 0.5 0.5
+7 135.0 0.5 0.5
+8 139.0 0.5 0.5
+9 143.0 0.5 0.5
+10 147.0 0.5 0.5
+11 151.0 0.5 0.5
+12 155.0 0.5 0.5
+13 159.0 0.5 0.5
+14 163.0 0.5 0.5
+15 167.0 0.5 0.5
+16 171.0 0.5 0.5
+17 175.0 0.5 0.5
+Marker Name D13S317
+7 202.0 0.5 0.5
+8 206.0 0.5 0.5
+9 210.0 0.5 0.5
+10 214.0 0.5 0.5
+11 218.0 0.5 0.5
+12 222.0 0.5 0.5
+13 226.0 0.5 0.5
+14 230.0 0.5 0.5
+15 234.0 0.5 0.5
+16 238.0 0.5 0.5
+Marker Name D7S820
+5 254.0 0.5 0.5
+5.2 256.0 0.5 0.5
+6 258.0 0.5 0.5
+7 262.0 0.5 0.5
+8 266.0 0.5 0.5
+8.2 268.0 0.5 0.5
+9 270.0 0.5 0.5
+9.2 272.0 0.5 0.5
+10 274.0 0.5 0.5
+11 278.0 0.5 0.5
+12 282.0 0.5 0.5
+13 286.0 0.5 0.5
+14 290.0 0.5 0.5
+15 294.0 0.5 0.5
+16 298.0 0.5 0.5
+Panel Name Profiler_Plus_v1
+Marker Name D3S1358
+11 110.0 0.5 0.5
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+20 146.0 0.5 0.5
+Marker Name vWA
+10 153.0 0.5 0.5
+11 157.0 0.5 0.5
+12 161.0 0.5 0.5
+13 165.0 0.5 0.5
+14 169.0 0.5 0.5
+15 173.0 0.5 0.5
+15.2 175.0 0.5 0.5
+16 177.0 0.5 0.5
+17 181.0 0.5 0.5
+18 185.0 0.5 0.5
+18.2 187.0 0.5 0.5
+19 189.0 0.5 0.5
+20 193.0 0.5 0.5
+21 197.0 0.5 0.5
+22 201.0 0.5 0.5
+Marker Name FGA
+17 215.0 0.5 0.5
+17.2 217.0 0.5 0.5
+18 219.0 0.5 0.5
+18.2 221.0 0.5 0.5
+19 223.0 0.5 0.5
+19.2 225.0 0.5 0.5
+20 227.0 0.5 0.5
+20.2 229.0 0.5 0.5
+21 231.0 0.5 0.5
+21.2 233.0 0.5 0.5
+22 235.0 0.5 0.5
+22.2 237.0 0.5 0.5
+23 239.0 0.5 0.5
+23.2 241.0 0.5 0.5
+24 243.0 0.5 0.5
+24.2 245.0 0.5 0.5
+25 247.0 0.5 0.5
+25.2 249.0 0.5 0.5
+26 251.0 0.5 0.5
+26.2 253.0 0.5 0.5
+27 255.0 0.5 0.5
+27.2 257.0 0.5 0.5
+28 259.0 0.5 0.5
+28.2 261.0 0.5 0.5
+29 263.0 0.5 0.5
+29.2 265.0 0.5 0.5
+30 267.0 0.5 0.5
+30.2 269.0 0.5 0.5
+31 271.0 0.5 0.5
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name D8S1179
+7 124.0 0.5 0.5
+8 128.0 0.5 0.5
+9 132.0 0.5 0.5
+10 136.0 0.5 0.5
+11 140.0 0.5 0.5
+12 144.0 0.5 0.5
+13 148.0 0.5 0.5
+14 152.0 0.5 0.5
+15 156.0 0.5 0.5
+16 160.0 0.5 0.5
+17 164.0 0.5 0.5
+18 168.0 0.5 0.5
+19 172.0 0.5 0.5
+20 176.0 0.5 0.5
+Marker Name D21S11
+23.2 185.0 0.5 0.5
+24 187.0 0.5 0.5
+24.2 189.0 0.5 0.5
+25 191.0 0.5 0.5
+25.2 193.0 0.5 0.5
+26 195.0 0.5 0.5
+26.2 197.0 0.5 0.5
+27 199.0 0.5 0.5
+27.2 201.0 0.5 0.5
+28 203.0 0.5 0.5
+28.2 205.0 0.5 0.5
+29 207.0 0.5 0.5
+29.2 209.0 0.5 0.5
+30 211.0 0.5 0.5
+30.2 213.0 0.5 0.5
+31 215.0 0.5 0.5
+31.2 217.0 0.5 0.5
+32 219.0 0.5 0.5
+32.2 221.0 0.5 0.5
+33 223.0 0.5 0.5
+33.2 225.0 0.5 0.5
+34 227.0 0.5 0.5
+34.2 229.0 0.5 0.5
+35 231.0 0.5 0.5
+35.2 233.0 0.5 0.5
+36 235.0 0.5 0.5
+36.2 237.0 0.5 0.5
+37 239.0 0.5 0.5
+37.2 241.0 0.5 0.5
+38 243.0 0.5 0.5
+38.2 245.0 0.5 0.5
+39 247.0 0.5 0.5
+Marker Name D18S51
+7 265.0 0.5 0.5
+8 269.0 0.5 0.5
+9 273.0 0.5 0.5
+9.2 275.0 0.5 0.5
+10 277.0 0.5 0.5
+10.2 279.0 0.5 0.5
+11 281.0 0.5 0.5
+11.2 283.0 0.5 0.5
+12 285.0 0.5 0.5
+12.2 287.0 0.5 0.5
+13 289.0 0.5 0.5
+13.2 291.0 0.5 0.5
+14 293.0 0.5 0.5
+14.2 295.0 0.5 0.5
+15 297.0 0.5 0.5
+15.2 299.0 0.5 0.5
+16 301.0 0.5 0.5
+16.2 303.0 0.5 0.5
+17 305.0 0.5 0.5
+17.2 307.0 0.5 0.5
+18 309.0 0.5 0.5
+18.2 311.0 0.5 0.5
+19 313.0 0.5 0.5
+19.2 315.0 0.5 0.5
+20 317.0 0.5 0.5
+20.2 319.0 0.5 0.5
+21 321.0 0.5 0.5
+21.2 323.0 0.5 0.5
+22 325.0 0.5 0.5
+22.2 327.0 0.5 0.5
+23 329.0 0.5 0.5
+23.2 331.0 0.5 0.5
+24 333.0 0.5 0.5
+25 337.0 0.5 0.5
+26 341.0 0.5 0.5
+27 345.0 0.5 0.5
+Marker Name D5S818
+6 131.0 0.5 0.5
+7 135.0 0.5 0.5
+8 139.0 0.5 0.5
+9 143.0 0.5 0.5
+10 147.0 0.5 0.5
+11 151.0 0.5 0.5
+12 155.0 0.5 0.5
+13 159.0 0.5 0.5
+14 163.0 0.5 0.5
+15 167.0 0.5 0.5
+16 171.0 0.5 0.5
+17 175.0 0.5 0.5
+Marker Name D13S317
+7 202.0 0.5 0.5
+8 206.0 0.5 0.5
+9 210.0 0.5 0.5
+10 214.0 0.5 0.5
+11 218.0 0.5 0.5
+12 222.0 0.5 0.5
+13 226.0 0.5 0.5
+14 230.0 0.5 0.5
+15 234.0 0.5 0.5
+16 238.0 0.5 0.5
+Marker Name D7S820
+5 254.0 0.5 0.5
+5.2 256.0 0.5 0.5
+6 258.0 0.5 0.5
+7 262.0 0.5 0.5
+8 266.0 0.5 0.5
+8.2 268.0 0.5 0.5
+9 270.0 0.5 0.5
+9.2 272.0 0.5 0.5
+10 274.0 0.5 0.5
+11 278.0 0.5 0.5
+12 282.0 0.5 0.5
+13 286.0 0.5 0.5
+14 290.0 0.5 0.5
+15 294.0 0.5 0.5
+16 298.0 0.5 0.5
+Panel Name COfiler_v1
+Marker Name D3S1358
+11 110.0 0.5 0.5
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+20 146.0 0.5 0.5
+Marker Name D16S539
+5 234.0 0.5 0.5
+6 238.0 0.5 0.5
+7 242.0 0.5 0.5
+8 246.0 0.5 0.5
+9 250.0 0.5 0.5
+10 254.0 0.5 0.5
+11 258.0 0.5 0.5
+12 262.0 0.5 0.5
+12.2 264.0 0.5 0.5
+13 266.0 0.5 0.5
+14 270.0 0.5 0.5
+15 274.0 0.5 0.5
+16 278.0 0.5 0.5
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name TH01
+4 165.0 0.5 0.5
+5 169.0 0.5 0.5
+5.3 172.0 0.5 0.4
+6 173.0 0.4 0.5
+6.3 176.0 0.5 0.4
+7 177.0 0.4 0.5
+7.3 180.0 0.5 0.4
+8 181.0 0.4 0.5
+8.3 184.0 0.5 0.4
+9 185.0 0.4 0.5
+9.3 188.0 0.5 0.4
+10 189.0 0.4 0.5
+10.3 192.0 0.5 0.4
+11 193.0 0.4 0.5
+Marker Name TPOX
+5 214.0 0.5 0.5
+6 218.0 0.5 0.5
+7 222.0 0.5 0.5
+8 226.0 0.5 0.5
+9 230.0 0.5 0.5
+10 234.0 0.5 0.5
+11 238.0 0.5 0.5
+12 242.0 0.5 0.5
+13 246.0 0.5 0.5
+14 250.0 0.5 0.5
+Marker Name CSF1PO
+6 281.0 0.5 0.5
+7 285.0 0.5 0.5
+8 289.0 0.5 0.5
+9 293.0 0.5 0.5
+10 297.0 0.5 0.5
+10.2 299.0 0.5 0.5
+11 301.0 0.5 0.5
+12 305.0 0.5 0.5
+13 309.0 0.5 0.5
+14 313.0 0.5 0.5
+15 317.0 0.5 0.5
+16 321.0 0.5 0.5
+Marker Name D7S820
+5 254.0 0.5 0.5
+5.2 256.0 0.5 0.5
+6 258.0 0.5 0.5
+7 262.0 0.5 0.5
+8 266.0 0.5 0.5
+8.2 268.0 0.5 0.5
+9 270.0 0.5 0.5
+9.2 272.0 0.5 0.5
+10 274.0 0.5 0.5
+11 278.0 0.5 0.5
+12 282.0 0.5 0.5
+13 286.0 0.5 0.5
+14 290.0 0.5 0.5
+15 294.0 0.5 0.5
+16 298.0 0.5 0.5
+Panel Name SGM_Plus_v1
+Marker Name D3S1358
+11 110.0 0.5 0.5
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+20 146.0 0.5 0.5
+Marker Name vWA
+10 153.0 0.5 0.5
+11 157.0 0.5 0.5
+12 161.0 0.5 0.5
+13 165.0 0.5 0.5
+14 169.0 0.5 0.5
+15 173.0 0.5 0.5
+15.2 175.0 0.5 0.5
+16 177.0 0.5 0.5
+17 181.0 0.5 0.5
+18 185.0 0.5 0.5
+18.2 187.0 0.5 0.5
+19 189.0 0.5 0.5
+20 193.0 0.5 0.5
+21 197.0 0.5 0.5
+22 201.0 0.5 0.5
+23 205.0 0.5 0.5
+24 209.0 0.5 0.5
+25 213.0 0.5 0.5
+Marker Name D16S539
+5 234.0 0.5 0.5
+6 238.0 0.5 0.5
+7 242.0 0.5 0.5
+8 246.0 0.5 0.5
+9 250.0 0.5 0.5
+10 254.0 0.5 0.5
+11 258.0 0.5 0.5
+12 262.0 0.5 0.5
+12.2 264.0 0.5 0.5
+13 266.0 0.5 0.5
+14 270.0 0.5 0.5
+15 274.0 0.5 0.5
+16 278.0 0.5 0.5
+Marker Name D2S1338
+14 285.0 0.5 0.5
+15 289.0 0.5 0.5
+16 293.0 0.5 0.5
+17 297.0 0.5 0.5
+18 301.0 0.5 0.5
+19 305.0 0.5 0.5
+20 309.0 0.5 0.5
+21 313.0 0.5 0.5
+22 317.0 0.5 0.5
+23 321.0 0.5 0.5
+24 325.0 0.5 0.5
+25 329.0 0.5 0.5
+26 333.0 0.5 0.5
+27 337.0 0.5 0.5
+28 341.0 0.5 0.5
+29 345.0 0.5 0.5
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name D8S1179
+7 124.0 0.5 0.5
+8 128.0 0.5 0.5
+9 132.0 0.5 0.5
+10 136.0 0.5 0.5
+11 140.0 0.5 0.5
+12 144.0 0.5 0.5
+13 148.0 0.5 0.5
+14 152.0 0.5 0.5
+15 156.0 0.5 0.5
+16 160.0 0.5 0.5
+17 164.0 0.5 0.5
+18 168.0 0.5 0.5
+19 172.0 0.5 0.5
+20 176.0 0.5 0.5
+Marker Name D21S11
+23.2 185.0 0.5 0.5
+24 187.0 0.5 0.5
+24.2 189.0 0.5 0.5
+25 191.0 0.5 0.5
+25.2 193.0 0.5 0.5
+26 195.0 0.5 0.5
+26.2 197.0 0.5 0.5
+27 199.0 0.5 0.5
+27.2 201.0 0.5 0.5
+28 203.0 0.5 0.5
+28.2 205.0 0.5 0.5
+29 207.0 0.5 0.5
+29.2 209.0 0.5 0.5
+30 211.0 0.5 0.5
+30.2 213.0 0.5 0.5
+31 215.0 0.5 0.5
+31.2 217.0 0.5 0.5
+32 219.0 0.5 0.5
+32.2 221.0 0.5 0.5
+33 223.0 0.5 0.5
+33.2 225.0 0.5 0.5
+34 227.0 0.5 0.5
+34.2 229.0 0.5 0.5
+35 231.0 0.5 0.5
+35.2 233.0 0.5 0.5
+36 235.0 0.5 0.5
+36.2 237.0 0.5 0.5
+37 239.0 0.5 0.5
+37.2 241.0 0.5 0.5
+38 243.0 0.5 0.5
+38.2 245.0 0.5 0.5
+39 247.0 0.5 0.5
+Marker Name D18S51
+7 265.0 0.5 0.5
+8 269.0 0.5 0.5
+9 273.0 0.5 0.5
+9.2 275.0 0.5 0.5
+10 277.0 0.5 0.5
+10.2 279.0 0.5 0.5
+11 281.0 0.5 0.5
+11.2 283.0 0.5 0.5
+12 285.0 0.5 0.5
+12.2 287.0 0.5 0.5
+13 289.0 0.5 0.5
+13.2 291.0 0.5 0.5
+14 293.0 0.5 0.5
+14.2 295.0 0.5 0.5
+15 297.0 0.5 0.5
+15.2 299.0 0.5 0.5
+16 301.0 0.5 0.5
+16.2 303.0 0.5 0.5
+17 305.0 0.5 0.5
+17.2 307.0 0.5 0.5
+18 309.0 0.5 0.5
+18.2 311.0 0.5 0.5
+19 313.0 0.5 0.5
+19.2 315.0 0.5 0.5
+20 317.0 0.5 0.5
+20.2 319.0 0.5 0.5
+21 321.0 0.5 0.5
+21.2 323.0 0.5 0.5
+22 325.0 0.5 0.5
+22.2 327.0 0.5 0.5
+23 329.0 0.5 0.5
+23.2 331.0 0.5 0.5
+24 333.0 0.5 0.5
+25 337.0 0.5 0.5
+26 341.0 0.5 0.5
+27 345.0 0.5 0.5
+Marker Name D19S433
+9 106.0 0.5 0.5
+9.2 108.0 0.5 0.5
+10 110.0 0.5 0.5
+10.2 112.0 0.5 0.5
+11 114.0 0.5 0.5
+11.2 116.0 0.5 0.5
+12 118.0 0.5 0.5
+12.2 120.0 0.5 0.5
+13 122.0 0.5 0.5
+13.2 124.0 0.5 0.5
+14 126.0 0.5 0.5
+14.2 128.0 0.5 0.5
+15 130.0 0.5 0.5
+15.2 132.0 0.5 0.5
+16 134.0 0.5 0.5
+16.2 136.0 0.5 0.5
+17 138.0 0.5 0.5
+17.2 140.0 0.5 0.5
+18 142.0 0.5 0.5
+18.2 144.0 0.5 0.5
+Marker Name TH01
+3 161.0 0.5 0.5
+4 165.0 0.5 0.5
+5 169.0 0.5 0.5
+5.3 172.0 0.5 0.4
+6 173.0 0.4 0.5
+6.3 176.0 0.5 0.4
+7 177.0 0.4 0.5
+7.3 180.0 0.5 0.4
+8 181.0 0.4 0.5
+8.3 184.0 0.5 0.4
+9 185.0 0.4 0.5
+9.3 188.0 0.5 0.4
+10 189.0 0.4 0.5
+10.3 192.0 0.5 0.4
+11 193.0 0.4 0.5
+12 197.0 0.5 0.5
+13 201.0 0.5 0.5
+13.3 204.0 0.5 0.5
+Marker Name FGA
+16 211.0 0.5 0.5
+16.2 213.0 0.5 0.5
+17 215.0 0.5 0.5
+17.2 217.0 0.5 0.5
+18 219.0 0.5 0.5
+18.2 221.0 0.5 0.5
+19 223.0 0.5 0.5
+19.2 225.0 0.5 0.5
+20 227.0 0.5 0.5
+20.2 229.0 0.5 0.5
+21 231.0 0.5 0.5
+21.2 233.0 0.5 0.5
+22 235.0 0.5 0.5
+22.2 237.0 0.5 0.5
+23 239.0 0.5 0.5
+23.2 241.0 0.5 0.5
+24 243.0 0.5 0.5
+24.2 245.0 0.5 0.5
+25 247.0 0.5 0.5
+25.2 249.0 0.5 0.5
+26 251.0 0.5 0.5
+26.2 253.0 0.5 0.5
+27 255.0 0.5 0.5
+27.2 257.0 0.5 0.5
+28 259.0 0.5 0.5
+28.2 261.0 0.5 0.5
+29 263.0 0.5 0.5
+29.2 265.0 0.5 0.5
+30 267.0 0.5 0.5
+30.2 269.0 0.5 0.5
+31 271.0 0.5 0.5
+31.2 273.0 0.5 0.5
+32 275.0 0.5 0.5
+32.2 277.0 0.5 0.5
+33.2 281.0 0.5 0.5
+34.2 285.0 0.5 0.5
+42.2 317.0 0.5 0.5
+43.2 321.0 0.5 0.5
+44.2 325.0 0.5 0.5
+45.2 329.0 0.5 0.5
+46.2 333.0 0.5 0.5
+47.2 337.0 0.5 0.5
+48.2 341.0 0.5 0.5
+49.2 345.0 0.5 0.5
+50.2 349.0 0.5 0.5
+51.2 353.0 0.5 0.5
+Panel Name Identifiler_v1
+Marker Name D8S1179
+7 124.0 0.5 0.5
+8 128.0 0.5 0.5
+9 132.0 0.5 0.5
+10 136.0 0.5 0.5
+11 140.0 0.5 0.5
+12 144.0 0.5 0.5
+13 148.0 0.5 0.5
+14 152.0 0.5 0.5
+15 156.0 0.5 0.5
+16 160.0 0.5 0.5
+17 164.0 0.5 0.5
+18 168.0 0.5 0.5
+19 172.0 0.5 0.5
+20 176.0 0.5 0.5
+Marker Name D21S11
+23.2 185.0 0.5 0.5
+24 187.0 0.5 0.5
+24.2 189.0 0.5 0.5
+25 191.0 0.5 0.5
+25.2 193.0 0.5 0.5
+26 195.0 0.5 0.5
+26.2 197.0 0.5 0.5
+27 199.0 0.5 0.5
+27.2 201.0 0.5 0.5
+28 203.0 0.5 0.5
+28.2 205.0 0.5 0.5
+29 207.0 0.5 0.5
+29.2 209.0 0.5 0.5
+30 211.0 0.5 0.5
+30.2 213.0 0.5 0.5
+31 215.0 0.5 0.5
+31.2 217.0 0.5 0.5
+32 219.0 0.5 0.5
+32.2 221.0 0.5 0.5
+33 223.0 0.5 0.5
+33.2 225.0 0.5 0.5
+34 227.0 0.5 0.5
+34.2 229.0 0.5 0.5
+35 231.0 0.5 0.5
+35.2 233.0 0.5 0.5
+36 235.0 0.5 0.5
+36.2 237.0 0.5 0.5
+37 239.0 0.5 0.5
+37.2 241.0 0.5 0.5
+38 243.0 0.5 0.5
+38.2 245.0 0.5 0.5
+39 247.0 0.5 0.5
+Marker Name D7S820
+5 254.0 0.5 0.5
+5.2 256.0 0.5 0.5
+6 258.0 0.5 0.5
+7 262.0 0.5 0.5
+8 266.0 0.5 0.5
+8.2 268.0 0.5 0.5
+9 270.0 0.5 0.5
+9.2 272.0 0.5 0.5
+10 274.0 0.5 0.5
+11 278.0 0.5 0.5
+12 282.0 0.5 0.5
+13 286.0 0.5 0.5
+14 290.0 0.5 0.5
+15 294.0 0.5 0.5
+16 298.0 0.5 0.5
+Marker Name CSF1PO
+5 302.63 0.5 0.5
+6 306.63 0.5 0.5
+7 310.63 0.5 0.5
+8 314.63 0.5 0.5
+9 318.63 0.5 0.5
+10 322.63 0.5 0.5
+10.2 324.63 0.5 0.5
+11 326.63 0.5 0.5
+12 330.63 0.5 0.5
+13 334.63 0.5 0.5
+14 338.63 0.5 0.5
+15 342.63 0.5 0.5
+16 346.63 0.5 0.5
+Marker Name D3S1358
+11 110.0 0.5 0.5
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+20 146.0 0.5 0.5
+Marker Name TH01
+3 161.0 0.5 0.5
+4 165.0 0.5 0.5
+5 169.0 0.5 0.5
+5.3 172.0 0.5 0.4
+6 173.0 0.4 0.5
+6.3 176.0 0.5 0.4
+7 177.0 0.4 0.5
+7.3 180.0 0.5 0.4
+8 181.0 0.4 0.5
+8.3 184.0 0.5 0.4
+9 185.0 0.4 0.5
+9.3 188.0 0.5 0.4
+10 189.0 0.4 0.5
+10.3 192.0 0.5 0.4
+11 193.0 0.4 0.5
+12 197.0 0.5 0.5
+13 201.0 0.5 0.5
+13.3 204.0 0.5 0.5
+Marker Name D13S317
+7 213.65 0.5 0.5
+8 217.65 0.5 0.5
+9 221.65 0.5 0.5
+10 225.65 0.5 0.5
+11 229.65 0.5 0.5
+12 233.65 0.5 0.5
+13 237.65 0.5 0.5
+14 241.65 0.5 0.5
+15 245.65 0.5 0.5
+16 249.65 0.5 0.5
+Marker Name D16S539
+5 257.3 0.5 0.5
+6 261.3 0.5 0.5
+7 265.3 0.5 0.5
+8 269.3 0.5 0.5
+9 273.3 0.5 0.5
+10 277.3 0.5 0.5
+11 281.3 0.5 0.5
+12 285.3 0.5 0.5
+12.2 287.3 0.5 0.5
+13 289.3 0.5 0.5
+14 293.3 0.5 0.5
+15 297.3 0.5 0.5
+16 301.3 0.5 0.5
+Marker Name D2S1338
+14 305.31 0.5 0.5
+15 309.31 0.5 0.5
+16 313.31 0.5 0.5
+17 317.31 0.5 0.5
+18 319.31 0.5 0.5
+19 325.31 0.5 0.5
+20 329.31 0.5 0.5
+21 333.31 0.5 0.5
+22 337.31 0.5 0.5
+23 341.31 0.5 0.5
+24 345.31 0.5 0.5
+25 349.31 0.5 0.5
+26 353.31 0.5 0.5
+27 357.31 0.5 0.5
+28 361.31 0.5 0.5
+29 365.31 0.5 0.5
+Marker Name D19S433
+9 106.0 0.5 0.5
+9.2 108.0 0.5 0.5
+10 110.0 0.5 0.5
+10.2 112.0 0.5 0.5
+11 114.0 0.5 0.5
+11.2 116.0 0.5 0.5
+12 118.0 0.5 0.5
+12.2 120.0 0.5 0.5
+13 122.0 0.5 0.5
+13.2 124.0 0.5 0.5
+14 126.0 0.5 0.5
+14.2 128.0 0.5 0.5
+15 130.0 0.5 0.5
+15.2 132.0 0.5 0.5
+16 134.0 0.5 0.5
+16.2 136.0 0.5 0.5
+17 138.0 0.5 0.5
+17.2 140.0 0.5 0.5
+18 142.0 0.5 0.5
+18.2 144.0 0.5 0.5
+Marker Name vWA
+10 153.0 0.5 0.5
+11 157.0 0.5 0.5
+12 161.0 0.5 0.5
+13 165.0 0.5 0.5
+14 169.0 0.5 0.5
+15 173.0 0.5 0.5
+15.2 175.0 0.5 0.5
+16 177.0 0.5 0.5
+17 181.0 0.5 0.5
+18 185.0 0.5 0.5
+18.2 187.0 0.5 0.5
+19 189.0 0.5 0.5
+20 193.0 0.5 0.5
+21 197.0 0.5 0.5
+22 201.0 0.5 0.5
+23 205.0 0.5 0.5
+24 209.0 0.5 0.5
+25 213.0 0.5 0.5
+Marker Name TPOX
+5 220.99 0.5 0.5
+6 224.99 0.5 0.5
+7 228.99 0.5 0.5
+8 232.99 0.5 0.5
+9 236.99 0.5 0.5
+10 240.99 0.5 0.5
+11 244.99 0.5 0.5
+12 248.99 0.5 0.5
+13 252.99 0.5 0.5
+14 256.99 0.5 0.5
+Marker Name D18S51
+7 265.0 0.5 0.5
+8 269.0 0.5 0.5
+9 273.0 0.5 0.5
+9.2 275.0 0.5 0.5
+10 277.0 0.5 0.5
+10.2 279.0 0.5 0.5
+11 281.0 0.5 0.5
+11.2 283.0 0.5 0.5
+12 285.0 0.5 0.5
+12.2 287.0 0.5 0.5
+13 289.0 0.5 0.5
+13.2 291.0 0.5 0.5
+14 293.0 0.5 0.5
+14.2 295.0 0.5 0.5
+15 297.0 0.5 0.5
+15.2 299.0 0.5 0.5
+16 301.0 0.5 0.5
+16.2 303.0 0.5 0.5
+17 305.0 0.5 0.5
+17.2 307.0 0.5 0.5
+18 309.0 0.5 0.5
+18.2 311.0 0.5 0.5
+19 313.0 0.5 0.5
+19.2 315.0 0.5 0.5
+20 317.0 0.5 0.5
+20.2 319.0 0.5 0.5
+21 321.0 0.5 0.5
+21.2 323.0 0.5 0.5
+22 325.0 0.5 0.5
+22.2 327.0 0.5 0.5
+23 329.0 0.5 0.5
+23.2 331.0 0.5 0.5
+24 333.0 0.5 0.5
+25 337.0 0.5 0.5
+26 341.0 0.5 0.5
+27 345.0 0.5 0.5
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name D5S818
+6 131.0 0.5 0.5
+7 135.0 0.5 0.5
+8 139.0 0.5 0.5
+9 143.0 0.5 0.5
+10 147.0 0.5 0.5
+11 151.0 0.5 0.5
+12 155.0 0.5 0.5
+13 159.0 0.5 0.5
+14 163.0 0.5 0.5
+15 167.0 0.5 0.5
+16 171.0 0.5 0.5
+17 175.0 0.5 0.5
+Marker Name FGA
+16 211.0 0.5 0.5
+16.2 213.0 0.5 0.5
+17 215.0 0.5 0.5
+17.2 217.0 0.5 0.5
+18 219.0 0.5 0.5
+18.2 221.0 0.5 0.5
+19 223.0 0.5 0.5
+19.2 225.0 0.5 0.5
+20 227.0 0.5 0.5
+20.2 229.0 0.5 0.5
+21 231.0 0.5 0.5
+21.2 233.0 0.5 0.5
+22 235.0 0.5 0.5
+22.2 237.0 0.5 0.5
+23 239.0 0.5 0.5
+23.2 241.0 0.5 0.5
+24 243.0 0.5 0.5
+24.2 245.0 0.5 0.5
+25 247.0 0.5 0.5
+25.2 249.0 0.5 0.5
+26 251.0 0.5 0.5
+26.2 253.0 0.5 0.5
+27 255.0 0.5 0.5
+27.2 257.0 0.5 0.5
+28 259.0 0.5 0.5
+28.2 261.0 0.5 0.5
+29 263.0 0.5 0.5
+29.2 265.0 0.5 0.5
+30 267.0 0.5 0.5
+30.2 269.0 0.5 0.5
+31 271.0 0.5 0.5
+31.2 273.0 0.5 0.5
+32 275.0 0.5 0.5
+32.2 277.0 0.5 0.5
+33.2 281.0 0.5 0.5
+34.2 285.0 0.5 0.5
+42.2 317.0 0.5 0.5
+43.2 321.0 0.5 0.5
+44.2 325.0 0.5 0.5
+45.2 329.0 0.5 0.5
+46.2 333.0 0.5 0.5
+47.2 337.0 0.5 0.5
+48.2 341.0 0.5 0.5
+49.2 345.0 0.5 0.5
+50.2 349.0 0.5 0.5
+51.2 353.0 0.5 0.5
+Panel Name SEfiler_v1
+Marker Name D3S1358
+11 110.0 0.5 0.5
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+20 146.0 0.5 0.5
+Marker Name vWA
+10 153.0 0.5 0.5
+11 157.0 0.5 0.5
+12 161.0 0.5 0.5
+13 165.0 0.5 0.5
+14 169.0 0.5 0.5
+15 173.0 0.5 0.5
+15.2 175.0 0.5 0.5
+16 177.0 0.5 0.5
+17 181.0 0.5 0.5
+18 185.0 0.5 0.5
+18.2 187.0 0.5 0.5
+19 189.0 0.5 0.5
+20 193.0 0.5 0.5
+21 197.0 0.5 0.5
+22 201.0 0.5 0.5
+23 205.0 0.5 0.5
+24 209.0 0.5 0.5
+25 213.0 0.5 0.5
+Marker Name D16S539
+5 234.0 0.5 0.5
+6 238.0 0.5 0.5
+7 242.0 0.5 0.5
+8 246.0 0.5 0.5
+9 250.0 0.5 0.5
+10 254.0 0.5 0.5
+11 258.0 0.5 0.5
+12 262.0 0.5 0.5
+12.2 264.0 0.5 0.5
+13 266.0 0.5 0.5
+14 270.0 0.5 0.5
+15 274.0 0.5 0.5
+16 278.0 0.5 0.5
+Marker Name D2S1338
+14 285.0 0.5 0.5
+15 289.0 0.5 0.5
+16 293.0 0.5 0.5
+17 297.0 0.5 0.5
+18 301.0 0.5 0.5
+19 305.0 0.5 0.5
+20 309.0 0.5 0.5
+21 313.0 0.5 0.5
+22 317.0 0.5 0.5
+23 321.0 0.5 0.5
+24 325.0 0.5 0.5
+25 329.0 0.5 0.5
+26 333.0 0.5 0.5
+27 337.0 0.5 0.5
+28 341.0 0.5 0.5
+29 345.0 0.5 0.5
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name D8S1179
+7 124.0 0.5 0.5
+8 128.0 0.5 0.5
+9 132.0 0.5 0.5
+10 136.0 0.5 0.5
+11 140.0 0.5 0.5
+12 144.0 0.5 0.5
+13 148.0 0.5 0.5
+14 152.0 0.5 0.5
+15 156.0 0.5 0.5
+16 160.0 0.5 0.5
+17 164.0 0.5 0.5
+18 168.0 0.5 0.5
+19 172.0 0.5 0.5
+20 176.0 0.5 0.5
+Marker Name SE33
+4.2 203.0 0.5 0.5
+5 205.0 0.5 0.5
+6 209.0 0.5 0.5
+6.3 212.0 0.5 0.4
+7 213.0 0.4 0.5
+8 217.0 0.5 0.5
+8.2 219.0 0.5 0.5
+9 221.0 0.5 0.5
+9.2 223.0 0.5 0.5
+10 225.0 0.5 0.5
+10.2 227.0 0.5 0.5
+11 229.0 0.5 0.5
+11.2 231.0 0.5 0.5
+12 233.0 0.5 0.5
+12.2 235.0 0.5 0.5
+13 237.0 0.5 0.5
+13.2 239.0 0.5 0.5
+14 241.0 0.5 0.5
+14.2 243.0 0.5 0.5
+15 245.0 0.5 0.5
+15.2 247.0 0.5 0.5
+16 249.0 0.5 0.5
+16.2 251.0 0.5 0.5
+17 253.0 0.5 0.5
+17.2 255.0 0.5 0.5
+18 257.0 0.5 0.5
+18.2 259.0 0.5 0.5
+19 261.0 0.5 0.5
+19.2 263.0 0.5 0.5
+20 265.0 0.5 0.5
+20.2 267.0 0.5 0.5
+21 269.0 0.5 0.4
+21.1 270.0 0.4 0.4
+21.2 271.0 0.4 0.5
+22 273.0 0.5 0.5
+22.2 275.0 0.5 0.5
+23 277.0 0.5 0.5
+23.2 279.0 0.5 0.5
+24 281.0 0.5 0.5
+24.2 283.0 0.5 0.5
+25 285.0 0.5 0.5
+25.2 287.0 0.5 0.5
+26 289.0 0.5 0.5
+26.2 291.0 0.5 0.5
+27 293.0 0.5 0.5
+27.2 295.0 0.5 0.5
+28 297.0 0.5 0.5
+28.2 299.0 0.5 0.5
+29 301.0 0.5 0.5
+29.2 303.0 0.5 0.5
+30 305.0 0.5 0.5
+30.2 307.0 0.5 0.5
+31 309.0 0.5 0.5
+31.2 311.0 0.5 0.5
+32 313.0 0.5 0.5
+32.2 315.0 0.5 0.5
+33 317.0 0.5 0.5
+33.2 319.0 0.5 0.5
+34 321.0 0.5 0.5
+34.2 323.0 0.5 0.5
+35 325.0 0.5 0.5
+35.2 327.0 0.5 0.5
+36 329.0 0.5 0.5
+36.2 331.0 0.5 0.5
+37 333.0 0.5 0.5
+37.2 335.0 0.5 0.5
+38 337.0 0.5 0.5
+Marker Name D19S433
+9 106.0 0.5 0.5
+9.2 108.0 0.5 0.5
+10 110.0 0.5 0.5
+10.2 112.0 0.5 0.5
+11 114.0 0.5 0.5
+11.2 116.0 0.5 0.5
+12 118.0 0.5 0.5
+12.2 120.0 0.5 0.5
+13 122.0 0.5 0.5
+13.2 124.0 0.5 0.5
+14 126.0 0.5 0.5
+14.2 128.0 0.5 0.5
+15 130.0 0.5 0.5
+15.2 132.0 0.5 0.5
+16 134.0 0.5 0.5
+16.2 136.0 0.5 0.5
+17 138.0 0.5 0.5
+17.2 140.0 0.5 0.5
+18 142.0 0.5 0.5
+18.2 144.0 0.5 0.5
+Marker Name TH01
+3 161.0 0.5 0.5
+4 165.0 0.5 0.5
+5 169.0 0.5 0.5
+5.3 172.0 0.5 0.4
+6 173.0 0.4 0.5
+6.3 176.0 0.5 0.4
+7 177.0 0.4 0.5
+7.3 180.0 0.5 0.4
+8 181.0 0.4 0.5
+8.3 184.0 0.5 0.4
+9 185.0 0.4 0.5
+9.3 188.0 0.5 0.4
+10 189.0 0.4 0.5
+10.3 192.0 0.5 0.4
+11 193.0 0.4 0.5
+12 197.0 0.5 0.5
+13 201.0 0.5 0.5
+13.3 204.0 0.5 0.5
+Marker Name FGA
+16 211.0 0.5 0.5
+16.2 213.0 0.5 0.5
+17 215.0 0.5 0.5
+17.2 217.0 0.5 0.5
+18 219.0 0.5 0.5
+18.2 221.0 0.5 0.5
+19 223.0 0.5 0.5
+19.2 225.0 0.5 0.5
+20 227.0 0.5 0.5
+20.2 229.0 0.5 0.5
+21 231.0 0.5 0.5
+21.2 233.0 0.5 0.5
+22 235.0 0.5 0.5
+22.2 237.0 0.5 0.5
+23 239.0 0.5 0.5
+23.2 241.0 0.5 0.5
+24 243.0 0.5 0.5
+24.2 245.0 0.5 0.5
+25 247.0 0.5 0.5
+25.2 249.0 0.5 0.5
+26 251.0 0.5 0.5
+26.2 253.0 0.5 0.5
+27 255.0 0.5 0.5
+27.2 257.0 0.5 0.5
+28 259.0 0.5 0.5
+28.2 261.0 0.5 0.5
+29 263.0 0.5 0.5
+29.2 265.0 0.5 0.5
+30 267.0 0.5 0.5
+30.2 269.0 0.5 0.5
+31 271.0 0.5 0.5
+31.2 273.0 0.5 0.5
+32 275.0 0.5 0.5
+32.2 277.0 0.5 0.5
+33.2 281.0 0.5 0.5
+34.2 285.0 0.5 0.5
+42.2 317.0 0.5 0.5
+43.2 321.0 0.5 0.5
+44.2 325.0 0.5 0.5
+45.2 329.0 0.5 0.5
+46.2 333.0 0.5 0.5
+47.2 337.0 0.5 0.5
+48.2 341.0 0.5 0.5
+49.2 345.0 0.5 0.5
+50.2 349.0 0.5 0.5
+51.2 353.0 0.5 0.5
+Marker Name D21S11
+23.2 185.0 0.5 0.5
+24 187.0 0.5 0.5
+24.2 189.0 0.5 0.5
+25 191.0 0.5 0.5
+25.2 193.0 0.5 0.5
+26 195.0 0.5 0.5
+26.2 197.0 0.5 0.5
+27 199.0 0.5 0.5
+27.2 201.0 0.5 0.5
+28 203.0 0.5 0.5
+28.2 205.0 0.5 0.5
+29 207.0 0.5 0.5
+29.2 209.0 0.5 0.5
+30 211.0 0.5 0.5
+30.2 213.0 0.5 0.5
+31 215.0 0.5 0.5
+31.2 217.0 0.5 0.5
+32 219.0 0.5 0.5
+32.2 221.0 0.5 0.5
+33 223.0 0.5 0.5
+33.2 225.0 0.5 0.5
+34 227.0 0.5 0.5
+34.2 229.0 0.5 0.5
+35 231.0 0.5 0.5
+35.2 233.0 0.5 0.5
+36 235.0 0.5 0.5
+36.2 237.0 0.5 0.5
+37 239.0 0.5 0.5
+37.2 241.0 0.5 0.5
+38 243.0 0.5 0.5
+38.2 245.0 0.5 0.5
+39 247.0 0.5 0.5
+Marker Name D18S51
+7 265.0 0.5 0.5
+8 269.0 0.5 0.5
+9 273.0 0.5 0.5
+9.2 275.0 0.5 0.5
+10 277.0 0.5 0.5
+10.2 279.0 0.5 0.5
+11 281.0 0.5 0.5
+11.2 283.0 0.5 0.5
+12 285.0 0.5 0.5
+12.2 287.0 0.5 0.5
+13 289.0 0.5 0.5
+13.2 291.0 0.5 0.5
+14 293.0 0.5 0.5
+14.2 295.0 0.5 0.5
+15 297.0 0.5 0.5
+15.2 299.0 0.5 0.5
+16 301.0 0.5 0.5
+16.2 303.0 0.5 0.5
+17 305.0 0.5 0.5
+17.2 307.0 0.5 0.5
+18 309.0 0.5 0.5
+18.2 311.0 0.5 0.5
+19 313.0 0.5 0.5
+19.2 315.0 0.5 0.5
+20 317.0 0.5 0.5
+20.2 319.0 0.5 0.5
+21 321.0 0.5 0.5
+21.2 323.0 0.5 0.5
+22 325.0 0.5 0.5
+22.2 327.0 0.5 0.5
+23 329.0 0.5 0.5
+23.2 331.0 0.5 0.5
+24 333.0 0.5 0.5
+25 337.0 0.5 0.5
+26 341.0 0.5 0.5
+27 345.0 0.5 0.5
+Panel Name Profiler_Plus_CODIS_v1
+Marker Name D3S1358
+<12 113.49 15.49 0.0
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+>19 142.51 0.0 5.49
+Marker Name vWA
+<11 156.49 5.49 0.0
+11 157.0 0.5 0.5
+12 161.0 0.5 0.5
+13 165.0 0.5 0.5
+14 169.0 0.5 0.5
+15 173.0 0.5 0.5
+15.2 175.0 0.5 0.5
+16 177.0 0.5 0.5
+17 181.0 0.5 0.5
+18 185.0 0.5 0.5
+18.2 187.0 0.5 0.5
+19 189.0 0.5 0.5
+20 193.0 0.5 0.5
+21 197.0 0.5 0.5
+>21 197.51 0.0 5.49
+Marker Name FGA
+<18 218.49 12.24 0.0
+18 219.0 0.5 0.5
+18.2 221.0 0.5 0.5
+19 223.0 0.5 0.5
+19.2 225.0 0.5 0.5
+20 227.0 0.5 0.5
+20.2 229.0 0.5 0.5
+21 231.0 0.5 0.5
+21.2 233.0 0.5 0.5
+22 235.0 0.5 0.5
+22.2 237.0 0.5 0.5
+23 239.0 0.5 0.5
+23.2 241.0 0.5 0.5
+24 243.0 0.5 0.5
+24.2 245.0 0.5 0.5
+25 247.0 0.5 0.5
+25.2 249.0 0.5 0.5
+26 251.0 0.5 0.5
+26.2 253.0 0.5 0.5
+27 255.0 0.5 0.5
+27.2 257.0 0.5 0.5
+28 259.0 0.5 0.5
+28.2 261.0 0.5 0.5
+29 263.0 0.5 0.5
+29.2 265.0 0.5 0.5
+30 267.0 0.5 0.5
+>30 267.51 0.0 92.49
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name D8S1179
+<8 127.49 9.49 0.0
+8 128.0 0.5 0.5
+9 132.0 0.5 0.5
+10 136.0 0.5 0.5
+11 140.0 0.5 0.5
+12 144.0 0.5 0.5
+13 148.0 0.5 0.5
+14 152.0 0.5 0.5
+15 156.0 0.5 0.5
+16 160.0 0.5 0.5
+17 164.0 0.5 0.5
+18 168.0 0.5 0.5
+19 172.0 0.5 0.5
+>19 172.51 0.0 10.99
+Marker Name D21S11
+<24.2 188.49 3.99 0.0
+24.2 189.0 0.5 0.5
+25 191.0 0.5 0.5
+25.2 193.0 0.5 0.5
+26 195.0 0.5 0.5
+26.2 197.0 0.5 0.5
+27 199.0 0.5 0.5
+27.2 201.0 0.5 0.5
+28 203.0 0.5 0.5
+28.2 205.0 0.5 0.5
+29 207.0 0.5 0.5
+29.2 209.0 0.5 0.5
+30 211.0 0.5 0.5
+30.2 213.0 0.5 0.5
+31 215.0 0.5 0.5
+31.2 217.0 0.5 0.5
+32 219.0 0.5 0.5
+32.2 221.0 0.5 0.5
+33 223.0 0.5 0.5
+33.2 225.0 0.5 0.5
+34 227.0 0.5 0.5
+34.2 229.0 0.5 0.5
+35 231.0 0.5 0.5
+35.2 233.0 0.5 0.5
+36 235.0 0.5 0.5
+36.2 237.0 0.5 0.5
+37 239.0 0.5 0.5
+37.2 241.0 0.5 0.5
+38 243.0 0.5 0.5
+>38 243.51 0.0 3.99
+Marker Name D18S51
+<9 272.49 8.0 0.0
+9 273.0 0.5 0.5
+9.2 275.0 0.5 0.5
+10 277.0 0.5 0.5
+10.2 279.0 0.5 0.5
+11 281.0 0.5 0.5
+11.2 283.0 0.5 0.5
+12 285.0 0.5 0.5
+12.2 287.0 0.5 0.5
+13 289.0 0.5 0.5
+13.2 291.0 0.5 0.5
+14 293.0 0.5 0.5
+14.2 295.0 0.5 0.5
+15 297.0 0.5 0.5
+15.2 299.0 0.5 0.5
+16 301.0 0.5 0.5
+16.2 303.0 0.5 0.5
+17 305.0 0.5 0.5
+17.2 307.0 0.5 0.5
+18 309.0 0.5 0.5
+18.2 311.0 0.5 0.5
+19 313.0 0.5 0.5
+19.2 315.0 0.5 0.5
+20 317.0 0.5 0.5
+20.2 319.0 0.5 0.5
+21 321.0 0.5 0.5
+21.2 323.0 0.5 0.5
+22 325.0 0.5 0.5
+22.2 327.0 0.5 0.5
+23 329.0 0.5 0.5
+23.2 331.0 0.5 0.5
+24 333.0 0.5 0.5
+25 337.0 0.5 0.5
+26 341.0 0.5 0.5
+>26 341.51 0.0 8.49
+Marker Name D5S818
+<7 134.49 6.49 0.0
+7 135.0 0.5 0.5
+8 139.0 0.5 0.5
+9 143.0 0.5 0.5
+10 147.0 0.5 0.5
+11 151.0 0.5 0.5
+12 155.0 0.5 0.5
+13 159.0 0.5 0.5
+14 163.0 0.5 0.5
+15 167.0 0.5 0.5
+16 171.0 0.5 0.5
+>16 171.51 0.0 8.49
+Marker Name D13S317
+<8 205.49 13.49 0.0
+8 206.0 0.5 0.5
+9 210.0 0.5 0.5
+10 214.0 0.5 0.5
+11 218.0 0.5 0.5
+12 222.0 0.5 0.5
+13 226.0 0.5 0.5
+14 230.0 0.5 0.5
+15 234.0 0.5 0.5
+>15 234.51 0.0 7.49
+Marker Name D7S820
+<6 257.49 6.49 0.0
+6 258.0 0.5 0.5
+7 262.0 0.5 0.5
+8 266.0 0.5 0.5
+8.2 268.0 0.5 0.5
+9 270.0 0.5 0.5
+9.2 272.0 0.5 0.5
+10 274.0 0.5 0.5
+11 278.0 0.5 0.5
+12 282.0 0.5 0.5
+13 286.0 0.5 0.5
+14 290.0 0.5 0.5
+15 294.0 0.5 0.5
+>15 294.51 0.0 3.99
+Panel Name COfiler_CODIS_v1
+Marker Name D3S1358
+<12 113.49 15.49 0.0
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+>19 142.51 0.0 5.49
+Marker Name D16S539
+<5 233.49 4.49 0.0
+5 234.0 0.5 0.5
+6 238.0 0.5 0.5
+7 242.0 0.5 0.5
+8 246.0 0.5 0.5
+9 250.0 0.5 0.5
+10 254.0 0.5 0.5
+11 258.0 0.5 0.5
+12 262.0 0.5 0.5
+12.2 264.0 0.5 0.5
+13 266.0 0.5 0.5
+14 270.0 0.5 0.5
+15 274.0 0.5 0.5
+>15 274.51 0.0 4.49
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name TH01
+<5 168.49 9.49 0.0
+5 169.0 0.5 0.5
+5.3 172.0 0.5 0.4
+6 173.0 0.4 0.5
+6.3 176.0 0.5 0.4
+7 177.0 0.4 0.5
+7.3 180.0 0.5 0.4
+8 181.0 0.4 0.5
+8.3 184.0 0.5 0.4
+9 185.0 0.4 0.5
+9.3 188.0 0.5 0.4
+10 189.0 0.4 0.5
+>10 189.51 0.0 15.49
+Marker Name TPOX
+<6 217.49 5.49 0.0
+6 218.0 0.5 0.5
+7 222.0 0.5 0.5
+8 226.0 0.5 0.5
+9 230.0 0.5 0.5
+10 234.0 0.5 0.5
+11 238.0 0.5 0.5
+12 242.0 0.5 0.5
+13 246.0 0.5 0.5
+>13 246.51 0.0 7.49
+Marker Name CSF1PO
+<6 280.49 5.49 0.0
+6 281.0 0.5 0.5
+7 285.0 0.5 0.5
+8 289.0 0.5 0.5
+9 293.0 0.5 0.5
+10 297.0 0.5 0.5
+10.2 299.0 0.5 0.5
+11 301.0 0.5 0.5
+12 305.0 0.5 0.5
+13 309.0 0.5 0.5
+14 313.0 0.5 0.5
+15 317.0 0.5 0.5
+>15 317.51 0.0 5.49
+Marker Name D7S820
+<6 257.49 6.49 0.0
+6 258.0 0.5 0.5
+7 262.0 0.5 0.5
+8 266.0 0.5 0.5
+8.2 268.0 0.5 0.5
+9 270.0 0.5 0.5
+9.2 272.0 0.5 0.5
+10 274.0 0.5 0.5
+11 278.0 0.5 0.5
+12 282.0 0.5 0.5
+13 286.0 0.5 0.5
+14 290.0 0.5 0.5
+15 294.0 0.5 0.5
+>15 294.51 0.0 3.99
+Panel Name Identifiler_CODIS_v1
+Marker Name D8S1179
+<8 127.49 9.49 0.0
+8 128.0 0.5 0.5
+9 132.0 0.5 0.5
+10 136.0 0.5 0.5
+11 140.0 0.5 0.5
+12 144.0 0.5 0.5
+13 148.0 0.5 0.5
+14 152.0 0.5 0.5
+15 156.0 0.5 0.5
+16 160.0 0.5 0.5
+17 164.0 0.5 0.5
+18 168.0 0.5 0.5
+19 172.0 0.5 0.5
+>19 172.51 0.0 10.99
+Marker Name D21S11
+<24.2 188.49 3.99 0.0
+24.2 189.0 0.5 0.5
+25 191.0 0.5 0.5
+25.2 193.0 0.5 0.5
+26 195.0 0.5 0.5
+26.2 197.0 0.5 0.5
+27 199.0 0.5 0.5
+27.2 201.0 0.5 0.5
+28 203.0 0.5 0.5
+28.2 205.0 0.5 0.5
+29 207.0 0.5 0.5
+29.2 209.0 0.5 0.5
+30 211.0 0.5 0.5
+30.2 213.0 0.5 0.5
+31 215.0 0.5 0.5
+31.2 217.0 0.5 0.5
+32 219.0 0.5 0.5
+32.2 221.0 0.5 0.5
+33 223.0 0.5 0.5
+33.2 225.0 0.5 0.5
+34 227.0 0.5 0.5
+34.2 229.0 0.5 0.5
+35 231.0 0.5 0.5
+35.2 233.0 0.5 0.5
+36 235.0 0.5 0.5
+36.2 237.0 0.5 0.5
+37 239.0 0.5 0.5
+37.2 241.0 0.5 0.5
+38 243.0 0.5 0.5
+>38 243.51 0.0 3.99
+Marker Name D7S820
+<6 257.49 6.49 0.0
+6 258.0 0.5 0.5
+7 262.0 0.5 0.5
+8 266.0 0.5 0.5
+8.2 268.0 0.5 0.5
+9 270.0 0.5 0.5
+9.2 272.0 0.5 0.5
+10 274.0 0.5 0.5
+11 278.0 0.5 0.5
+12 282.0 0.5 0.5
+13 286.0 0.5 0.5
+14 290.0 0.5 0.5
+15 294.0 0.5 0.5
+>15 294.51 0.0 3.99
+Marker Name CSF1PO
+<6 306.12 4.0 0.0
+6 306.63 0.5 0.5
+7 310.63 0.5 0.5
+8 314.63 0.5 0.5
+9 318.63 0.5 0.5
+10 322.63 0.5 0.5
+10.2 324.63 0.5 0.5
+11 326.63 0.5 0.5
+12 330.63 0.5 0.5
+13 334.63 0.5 0.5
+14 338.63 0.5 0.5
+15 342.63 0.5 0.5
+>15 343.14 0.0 5.49
+Marker Name D3S1358
+<12 113.49 15.49 0.0
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+>19 142.51 0.0 5.49
+Marker Name TH01
+<4 164.49 5.49 0.0
+4 165.0 0.5 0.5
+5 169.0 0.5 0.5
+5.3 172.0 0.5 0.4
+6 173.0 0.4 0.5
+6.3 176.0 0.5 0.4
+7 177.0 0.4 0.5
+7.3 180.0 0.5 0.4
+8 181.0 0.4 0.5
+8.3 184.0 0.5 0.4
+9 185.0 0.4 0.5
+9.3 188.0 0.5 0.4
+10 189.0 0.4 0.5
+10.3 192.0 0.5 0.4
+11 193.0 0.4 0.5
+12 197.0 0.5 0.5
+13 201.0 0.5 0.5
+13.3 204.0 0.5 0.5
+>13.3 204.51 0.0 0.49
+Marker Name D13S317
+<8 217.14 11.48 0.0
+8 217.65 0.5 0.5
+9 221.65 0.5 0.5
+10 225.65 0.5 0.5
+11 229.65 0.5 0.5
+12 233.65 0.5 0.5
+13 237.65 0.5 0.5
+14 241.65 0.5 0.5
+15 245.65 0.5 0.5
+>15 246.16 0.0 4.0
+Marker Name D16S539
+<5 256.79 1.49 0.0
+5 257.3 0.5 0.5
+6 261.3 0.5 0.5
+7 265.3 0.5 0.5
+8 269.3 0.5 0.5
+9 273.3 0.5 0.5
+10 277.3 0.5 0.5
+11 281.3 0.5 0.5
+12 285.3 0.5 0.5
+12.2 287.3 0.5 0.5
+13 289.3 0.5 0.5
+14 293.3 0.5 0.5
+15 297.3 0.5 0.5
+>15 297.81 0.0 4.0
+Marker Name D2S1338
+<15 308.8 4.0 0.0
+15 309.31 0.5 0.5
+16 313.31 0.5 0.5
+17 317.31 0.5 0.5
+18 319.31 0.5 0.5
+19 325.31 0.5 0.5
+20 329.31 0.5 0.5
+21 333.31 0.5 0.5
+22 337.31 0.5 0.5
+23 341.31 0.5 0.5
+24 345.31 0.5 0.5
+25 349.31 0.5 0.5
+26 353.31 0.5 0.5
+27 357.31 0.5 0.5
+28 361.31 0.5 0.5
+>28 361.82 0.0 8.49
+Marker Name D19S433
+<9 105.49 4.48 0.0
+9 106.0 0.5 0.5
+9.2 108.0 0.5 0.5
+10 110.0 0.5 0.5
+10.2 112.0 0.5 0.5
+11 114.0 0.5 0.5
+11.2 116.0 0.5 0.5
+12 118.0 0.5 0.5
+12.2 120.0 0.5 0.5
+13 122.0 0.5 0.5
+13.2 124.0 0.5 0.5
+14 126.0 0.5 0.5
+14.2 128.0 0.5 0.5
+15 130.0 0.5 0.5
+15.2 132.0 0.5 0.5
+16 134.0 0.5 0.5
+16.2 136.0 0.5 0.5
+17 138.0 0.5 0.5
+17.2 140.0 0.5 0.5
+>17.2 140.51 0.0 7.49
+Marker Name vWA
+<11 156.49 5.49 0.0
+11 157.0 0.5 0.5
+12 161.0 0.5 0.5
+13 165.0 0.5 0.5
+14 169.0 0.5 0.5
+15 173.0 0.5 0.5
+15.2 175.0 0.5 0.5
+16 177.0 0.5 0.5
+17 181.0 0.5 0.5
+18 185.0 0.5 0.5
+18.2 187.0 0.5 0.5
+19 189.0 0.5 0.5
+20 193.0 0.5 0.5
+21 197.0 0.5 0.5
+>21 197.51 0.0 15.99
+Marker Name TPOX
+<6 224.48 7.48 0.0
+6 224.99 0.5 0.5
+7 228.99 0.5 0.5
+8 232.99 0.5 0.5
+9 236.99 0.5 0.5
+10 240.99 0.5 0.5
+11 244.99 0.5 0.5
+12 248.99 0.5 0.5
+13 252.99 0.5 0.5
+>13 253.5 0.0 7.49
+Marker Name D18S51
+<7 264.49 0.0 0.0
+7 265.0 0.5 0.5
+8 269.0 0.5 0.5
+9 273.0 0.5 0.5
+9.2 275.0 0.5 0.5
+10 277.0 0.5 0.5
+10.2 279.0 0.5 0.5
+11 281.0 0.5 0.5
+12 285.0 0.5 0.5
+12.2 287.0 0.5 0.5
+13 289.0 0.5 0.5
+13.2 291.0 0.5 0.5
+14 293.0 0.5 0.5
+14.2 295.0 0.5 0.5
+15 297.0 0.5 0.5
+15.2 299.0 0.5 0.5
+16 301.0 0.5 0.5
+16.2 303.0 0.5 0.5
+17 305.0 0.5 0.5
+17.2 307.0 0.5 0.5
+18 309.0 0.5 0.5
+18.2 311.0 0.5 0.5
+19 313.0 0.5 0.5
+19.2 315.0 0.5 0.5
+20 317.0 0.5 0.5
+20.2 319.0 0.5 0.5
+21 321.0 0.5 0.5
+21.2 323.0 0.5 0.5
+22 325.0 0.5 0.5
+23 329.0 0.5 0.5
+24 333.0 0.5 0.5
+25 337.0 0.5 0.5
+26 341.0 0.5 0.5
+>26 341.51 0.0 8.49
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name D5S818
+<7 134.49 6.49 0.0
+7 135.0 0.5 0.5
+8 139.0 0.5 0.5
+9 143.0 0.5 0.5
+10 147.0 0.5 0.5
+11 151.0 0.5 0.5
+12 155.0 0.5 0.5
+13 159.0 0.5 0.5
+14 163.0 0.5 0.5
+15 167.0 0.5 0.5
+16 171.0 0.5 0.5
+>16 171.51 0.0 8.49
+Marker Name FGA
+<18 218.49 12.24 0.0
+18 219.0 0.5 0.5
+18.2 221.0 0.5 0.5
+19 223.0 0.5 0.5
+19.2 225.0 0.5 0.5
+20 227.0 0.5 0.5
+20.2 229.0 0.5 0.5
+21 231.0 0.5 0.5
+21.2 233.0 0.5 0.5
+22 235.0 0.5 0.5
+22.2 237.0 0.5 0.5
+23 239.0 0.5 0.5
+23.2 241.0 0.5 0.5
+24 243.0 0.5 0.5
+24.2 245.0 0.5 0.5
+25 247.0 0.5 0.5
+25.2 249.0 0.5 0.5
+26 251.0 0.5 0.5
+26.2 253.0 0.5 0.5
+27 255.0 0.5 0.5
+27.2 257.0 0.5 0.5
+28 259.0 0.5 0.5
+28.2 261.0 0.5 0.5
+29 263.0 0.5 0.5
+29.2 265.0 0.5 0.5
+30 267.0 0.5 0.5
+>30 267.51 0.0 92.49
+Panel Name Identifiler_v1-dup
+Marker Name D8S1179
+7 124.0 0.5 0.5
+8 128.0 0.5 0.5
+9 132.0 0.5 0.5
+10 136.0 0.5 0.5
+11 140.0 0.5 0.5
+12 144.0 0.5 0.5
+13 148.0 0.5 0.5
+14 152.0 0.5 0.5
+15 156.0 0.5 0.5
+16 160.0 0.5 0.5
+17 164.0 0.5 0.5
+18 168.0 0.5 0.5
+19 172.0 0.5 0.5
+20 176.0 0.5 0.5
+Marker Name D21S11
+23.2 185.0 0.5 0.5
+24 187.0 0.5 0.5
+24.2 189.0 0.5 0.5
+25 191.0 0.5 0.5
+25.2 193.0 0.5 0.5
+26 195.0 0.5 0.5
+26.2 197.0 0.5 0.5
+27 199.0 0.5 0.5
+27.2 201.0 0.5 0.5
+28 203.0 0.5 0.5
+28.2 205.0 0.5 0.5
+29 207.0 0.5 0.5
+29.2 209.0 0.5 0.5
+30 211.0 0.5 0.5
+30.2 213.0 0.5 0.5
+31 215.0 0.5 0.5
+31.2 217.0 0.5 0.5
+32 219.0 0.5 0.5
+32.2 221.0 0.5 0.5
+33 223.0 0.5 0.5
+33.2 225.0 0.5 0.5
+34 227.0 0.5 0.5
+34.2 229.0 0.5 0.5
+35 231.0 0.5 0.5
+35.2 233.0 0.5 0.5
+36 235.0 0.5 0.5
+36.2 237.0 0.5 0.5
+37 239.0 0.5 0.5
+37.2 241.0 0.5 0.5
+38 243.0 0.5 0.5
+38.2 245.0 0.5 0.5
+39 247.0 0.5 0.5
+Marker Name D7S820
+5 254.0 0.5 0.5
+5.2 256.0 0.5 0.5
+6 258.0 0.5 0.5
+7 262.0 0.5 0.5
+8 266.0 0.5 0.5
+8.2 268.0 0.5 0.5
+9 270.0 0.5 0.5
+9.2 272.0 0.5 0.5
+10 274.0 0.5 0.5
+11 278.0 0.5 0.5
+12 282.0 0.5 0.5
+13 286.0 0.5 0.5
+14 290.0 0.5 0.5
+15 294.0 0.5 0.5
+16 298.0 0.5 0.5
+Marker Name CSF1PO
+15 342.63 0.5 0.5
+16 346.63 0.5 0.5
+5 302.63 0.5 0.5
+6 306.63 0.5 0.5
+7 310.63 0.5 0.5
+8 314.63 0.5 0.5
+9 318.63 0.5 0.5
+10 322.63 0.5 0.5
+10.2 324.63 0.5 0.5
+11 326.63 0.5 0.5
+12 330.63 0.5 0.5
+13 334.63 0.5 0.5
+14 338.63 0.5 0.5
+Marker Name D3S1358
+11 110.0 0.5 0.5
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+20 146.0 0.5 0.5
+Marker Name TH01
+3 161.0 0.5 0.5
+4 165.0 0.5 0.5
+5 169.0 0.5 0.5
+5.3 172.0 0.5 0.4
+6 173.0 0.4 0.5
+6.3 176.0 0.5 0.4
+7 177.0 0.4 0.5
+7.3 180.0 0.5 0.4
+8 181.0 0.4 0.5
+8.3 184.0 0.5 0.4
+9 185.0 0.4 0.5
+9.3 188.0 0.5 0.4
+10 189.0 0.4 0.5
+10.3 192.0 0.5 0.4
+11 193.0 0.4 0.5
+12 197.0 0.5 0.5
+13 201.0 0.5 0.5
+13.3 204.0 0.5 0.5
+Marker Name D13S317
+7 213.65 0.5 0.5
+8 217.65 0.5 0.5
+9 221.65 0.5 0.5
+10 225.65 0.5 0.5
+11 229.65 0.5 0.5
+12 233.65 0.5 0.5
+13 237.65 0.5 0.5
+14 241.65 0.5 0.5
+15 245.65 0.5 0.5
+16 249.65 0.5 0.5
+Marker Name D16S539
+5 257.3 0.5 0.5
+6 261.3 0.5 0.5
+7 265.3 0.5 0.5
+8 269.3 0.5 0.5
+9 273.3 0.5 0.5
+10 277.3 0.5 0.5
+11 281.3 0.5 0.5
+12 285.3 0.5 0.5
+12.2 287.3 0.5 0.5
+13 289.3 0.5 0.5
+14 293.3 0.5 0.5
+15 297.3 0.5 0.5
+16 301.3 0.5 0.5
+Marker Name D2S1338
+14 305.31 0.5 0.5
+15 309.31 0.5 0.5
+16 313.31 0.5 0.5
+17 317.31 0.5 0.5
+18 319.31 0.5 0.5
+19 325.31 0.5 0.5
+20 329.31 0.5 0.5
+21 333.31 0.5 0.5
+22 337.31 0.5 0.5
+23 341.31 0.5 0.5
+24 345.31 0.5 0.5
+25 349.31 0.5 0.5
+26 353.31 0.5 0.5
+27 357.31 0.5 0.5
+28 361.31 0.5 0.5
+29 365.31 0.5 0.5
+Marker Name D19S433
+9 106.0 0.5 0.5
+9.2 108.0 0.5 0.5
+10 110.0 0.5 0.5
+10.2 112.0 0.5 0.5
+11 114.0 0.5 0.5
+11.2 116.0 0.5 0.5
+12 118.0 0.5 0.5
+12.2 120.0 0.5 0.5
+13 122.0 0.5 0.5
+13.2 124.0 0.5 0.5
+14 126.0 0.5 0.5
+14.2 128.0 0.5 0.5
+15 130.0 0.5 0.5
+15.2 132.0 0.5 0.5
+16 134.0 0.5 0.5
+16.2 136.0 0.5 0.5
+17 138.0 0.5 0.5
+17.2 140.0 0.5 0.5
+18 142.0 0.5 0.5
+18.2 144.0 0.5 0.5
+Marker Name vWA
+22 201.0 0.5 0.5
+23 205.0 0.5 0.5
+24 209.0 0.5 0.5
+25 213.0 0.5 0.5
+10 153.0 0.5 0.5
+11 157.0 0.5 0.5
+12 161.0 0.5 0.5
+13 165.0 0.5 0.5
+14 169.0 0.5 0.5
+15 173.0 0.5 0.5
+15.2 175.0 0.5 0.5
+16 177.0 0.5 0.5
+17 181.0 0.5 0.5
+18 185.0 0.5 0.5
+18.2 187.0 0.5 0.5
+19 189.0 0.5 0.5
+20 193.0 0.5 0.5
+21 197.0 0.5 0.5
+Marker Name TPOX
+5 220.99 0.5 0.5
+6 224.99 0.5 0.5
+7 228.99 0.5 0.5
+8 232.99 0.5 0.5
+9 236.99 0.5 0.5
+10 240.99 0.5 0.5
+11 244.99 0.5 0.5
+12 248.99 0.5 0.5
+13 252.99 0.5 0.5
+14 256.99 0.5 0.5
+Marker Name D18S51
+7 265.0 0.5 0.5
+8 269.0 0.5 0.5
+9 273.0 0.5 0.5
+9.2 275.0 0.5 0.5
+10 277.0 0.5 0.5
+10.2 279.0 0.5 0.5
+11 281.0 0.5 0.5
+11.2 283.0 0.5 0.5
+12 285.0 0.5 0.5
+12.2 287.0 0.5 0.5
+13 289.0 0.5 0.5
+13.2 291.0 0.5 0.5
+14 293.0 0.5 0.5
+14.2 295.0 0.5 0.5
+15 297.0 0.5 0.5
+15.2 299.0 0.5 0.5
+16 301.0 0.5 0.5
+16.2 303.0 0.5 0.5
+17 305.0 0.5 0.5
+17.2 307.0 0.5 0.5
+18 309.0 0.5 0.5
+18.2 311.0 0.5 0.5
+19 313.0 0.5 0.5
+19.2 315.0 0.5 0.5
+20 317.0 0.5 0.5
+20.2 319.0 0.5 0.5
+21 321.0 0.5 0.5
+21.2 323.0 0.5 0.5
+22 325.0 0.5 0.5
+22.2 327.0 0.5 0.5
+23 329.0 0.5 0.5
+23.2 331.0 0.5 0.5
+24 333.0 0.5 0.5
+25 337.0 0.5 0.5
+26 341.0 0.5 0.5
+27 345.0 0.5 0.5
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name D5S818
+6 131.0 0.5 0.5
+7 135.0 0.5 0.5
+8 139.0 0.5 0.5
+9 143.0 0.5 0.5
+10 147.0 0.5 0.5
+11 151.0 0.5 0.5
+12 155.0 0.5 0.5
+13 159.0 0.5 0.5
+14 163.0 0.5 0.5
+15 167.0 0.5 0.5
+16 171.0 0.5 0.5
+17 175.0 0.5 0.5
+Marker Name FGA
+49.2 345.0 0.5 0.5
+50.2 349.0 0.5 0.5
+51.2 353.0 0.5 0.5
+16 211.0 0.5 0.5
+16.2 213.0 0.5 0.5
+17 215.0 0.5 0.5
+17.2 217.0 0.5 0.5
+18 219.0 0.5 0.5
+18.2 221.0 0.5 0.5
+19 223.0 0.5 0.5
+19.2 225.0 0.5 0.5
+20 227.0 0.5 0.5
+20.2 229.0 0.5 0.5
+21 231.0 0.5 0.5
+21.2 233.0 0.5 0.5
+22 235.0 0.5 0.5
+22.2 237.0 0.5 0.5
+23 239.0 0.5 0.5
+23.2 241.0 0.5 0.5
+24 243.0 0.5 0.5
+24.2 245.0 0.5 0.5
+25 247.0 0.5 0.5
+25.2 249.0 0.5 0.5
+26 251.0 0.5 0.5
+26.2 253.0 0.5 0.5
+27 255.0 0.5 0.5
+27.2 257.0 0.5 0.5
+28 259.0 0.5 0.5
+28.2 261.0 0.5 0.5
+29 263.0 0.5 0.5
+29.2 265.0 0.5 0.5
+30 267.0 0.5 0.5
+30.2 269.0 0.5 0.5
+31 271.0 0.5 0.5
+31.2 273.0 0.5 0.5
+32 275.0 0.5 0.5
+32.2 277.0 0.5 0.5
+33.2 281.0 0.5 0.5
+34.2 285.0 0.5 0.5
+42.2 317.0 0.5 0.5
+43.2 321.0 0.5 0.5
+44.2 325.0 0.5 0.5
+45.2 329.0 0.5 0.5
+46.2 333.0 0.5 0.5
+47.2 337.0 0.5 0.5
+48.2 341.0 0.5 0.5
+Panel Name PowerPlex_16_v1
+Marker Name D3S1358
+18 136.0 0.5 0.5
+19 140.0 0.5 0.5
+20 144.0 0.5 0.5
+21 148.0 0.5 0.5
+11 108.0 0.5 0.5
+12 112.0 0.5 0.5
+13 116.0 0.5 0.5
+14 120.0 0.5 0.5
+15 124.0 0.5 0.5
+15.2 126.0 0.5 0.5
+16 128.0 0.5 0.5
+17 132.0 0.5 0.5
+Marker Name TH01
+4 154.0 0.5 0.5
+5 158.0 0.5 0.5
+6 162.0 0.5 0.5
+7 166.0 0.5 0.5
+8 170.0 0.5 0.5
+9 174.0 0.5 0.5
+9.3 177.0 0.5 0.4
+10 178.0 0.5 0.5
+11 182.0 0.5 0.5
+13.3 193.0 0.5 0.5
+Marker Name D21S11
+23.2 198.0 0.5 0.5
+24 200.0 0.5 0.5
+24.2 202.0 0.5 0.5
+25 204.0 0.5 0.5
+25.2 206.0 0.5 0.5
+26 208.0 0.5 0.5
+26.2 210.0 0.5 0.5
+27 212.0 0.5 0.5
+27.2 214.0 0.5 0.5
+28 216.0 0.5 0.5
+28.2 218.0 0.5 0.5
+29 220.0 0.5 0.5
+29.2 222.0 0.5 0.5
+30 224.0 0.5 0.5
+30.2 226.0 0.5 0.5
+31 228.0 0.5 0.5
+31.2 230.0 0.5 0.5
+32 232.0 0.5 0.5
+32.2 234.0 0.5 0.5
+33 236.0 0.5 0.5
+33.2 238.0 0.5 0.5
+34 240.0 0.5 0.5
+34.2 242.0 0.5 0.5
+35 244.0 0.5 0.5
+35.2 246.0 0.5 0.5
+36 248.0 0.5 0.5
+36.2 250.0 0.5 0.5
+37 252.0 0.5 0.5
+37.2 254.0 0.5 0.5
+38 256.0 0.5 0.5
+38.2 258.0 0.5 0.5
+39 260.0 0.5 0.5
+Marker Name D18S51
+8 285.0 0.5 0.5
+9 289.0 0.5 0.5
+9.2 291.0 0.5 0.5
+10 293.0 0.5 0.5
+10.2 295.0 0.5 0.5
+11 297.0 0.5 0.5
+12 301.0 0.5 0.5
+13 305.0 0.5 0.5
+13.2 307.0 0.5 0.5
+14 309.0 0.5 0.5
+14.2 311.0 0.5 0.5
+15 313.0 0.5 0.5
+16 317.0 0.5 0.5
+17 321.0 0.5 0.5
+18 325.0 0.5 0.5
+19 329.0 0.5 0.5
+19.2 331.0 0.5 0.5
+20 333.0 0.5 0.5
+21 337.0 0.5 0.5
+22 341.0 0.5 0.5
+23 345.0 0.5 0.5
+24 349.0 0.5 0.5
+25 353.0 0.5 0.5
+26 357.0 0.5 0.5
+27 361.0 0.5 0.5
+Marker Name Penta_E
+7 387.0 0.5 0.5
+8 392.0 0.5 0.5
+9 397.0 0.5 0.5
+10 402.0 0.5 0.5
+11 407.0 0.5 0.5
+12 412.0 0.5 0.5
+13 417.0 0.5 0.5
+14 422.0 0.5 0.5
+15 427.0 0.5 0.5
+16 432.0 0.5 0.5
+17 437.0 0.5 0.5
+18 442.0 0.5 0.5
+19 447.0 0.5 0.5
+20 452.0 0.5 0.5
+21 457.0 0.5 0.5
+22 462.0 0.5 0.5
+23 467.0 0.5 0.5
+24 472.0 0.5 0.5
+25 477.0 0.5 0.5
+4 372.0 0.5 0.5
+5 377.0 0.5 0.5
+6 382.0 0.5 0.5
+Marker Name D5S818
+7 115.0 0.5 0.5
+8 119.0 0.5 0.5
+9 123.0 0.5 0.5
+10 127.0 0.5 0.5
+11 131.0 0.5 0.5
+12 135.0 0.5 0.5
+13 139.0 0.5 0.5
+14 143.0 0.5 0.5
+15 147.0 0.5 0.5
+16 151.0 0.5 0.5
+Marker Name D13S317
+7 173.0 0.5 0.5
+8 177.0 0.5 0.5
+9 181.0 0.5 0.5
+10 185.0 0.5 0.5
+11 189.0 0.5 0.5
+12 193.0 0.5 0.5
+13 197.0 0.5 0.5
+14 201.0 0.5 0.5
+15 205.0 0.5 0.5
+Marker Name D7S820
+6 213.0 0.5 0.5
+7 217.0 0.5 0.5
+8 221.0 0.5 0.5
+9 225.0 0.5 0.5
+10 229.0 0.5 0.5
+11 233.0 0.5 0.5
+12 237.0 0.5 0.5
+13 241.0 0.5 0.5
+14 245.0 0.5 0.5
+Marker Name D16S539
+5 263.0 0.5 0.5
+8 275.0 0.5 0.5
+9 279.0 0.5 0.5
+10 283.0 0.5 0.5
+11 287.0 0.5 0.5
+12 291.0 0.5 0.5
+13 295.0 0.5 0.5
+14 299.0 0.5 0.5
+15 303.0 0.5 0.5
+Marker Name CSF1PO
+6 318.0 0.5 0.5
+7 322.0 0.5 0.5
+8 326.0 0.5 0.5
+9 330.0 0.5 0.5
+10 334.0 0.5 0.5
+11 338.0 0.5 0.5
+12 342.0 0.5 0.5
+13 346.0 0.5 0.5
+14 350.0 0.5 0.5
+15 354.0 0.5 0.5
+Marker Name Penta_D
+2.2 369.0 0.5 0.5
+3.2 374.0 0.5 0.5
+5 382.0 0.5 0.5
+7 392.0 0.5 0.5
+8 397.0 0.5 0.5
+9 402.0 0.5 0.5
+10 407.0 0.5 0.5
+11 412.0 0.5 0.5
+12 417.0 0.5 0.5
+13 422.0 0.5 0.5
+14 427.0 0.5 0.5
+15 432.0 0.5 0.5
+16 437.0 0.5 0.5
+17 442.0 0.5 0.5
+Marker Name AMEL
+X 104.0 0.5 0.5
+Y 110.0 0.5 0.5
+Marker Name vWA
+22 170.0 0.5 0.5
+10 122.0 0.5 0.5
+11 126.0 0.5 0.5
+12 130.0 0.5 0.5
+13 134.0 0.5 0.5
+14 138.0 0.5 0.5
+15 142.0 0.5 0.5
+15.2 144.0 0.5 0.5
+16 146.0 0.5 0.5
+17 150.0 0.5 0.5
+18 154.0 0.5 0.5
+19 158.0 0.5 0.5
+20 162.0 0.5 0.5
+21 166.0 0.5 0.5
+Marker Name D8S1179
+7 202.0 0.5 0.5
+8 206.0 0.5 0.5
+9 210.0 0.5 0.5
+10 214.0 0.5 0.5
+11 218.0 0.5 0.5
+12 222.0 0.5 0.5
+13 226.0 0.5 0.5
+14 230.0 0.5 0.5
+15 234.0 0.5 0.5
+16 238.0 0.5 0.5
+17 242.0 0.5 0.5
+18 246.0 0.5 0.5
+19 250.0 0.5 0.5
+Marker Name TPOX
+5 257.0 0.5 0.5
+6 261.0 0.5 0.5
+7 265.0 0.5 0.5
+8 269.0 0.5 0.5
+9 273.0 0.5 0.5
+10 277.0 0.5 0.5
+11 281.0 0.5 0.5
+12 285.0 0.5 0.5
+13 289.0 0.5 0.5
+14 293.0 0.5 0.5
+Marker Name FGA
+16 320.0 0.5 0.5
+17 324.0 0.5 0.5
+17.2 326.0 0.5 0.5
+18 328.0 0.5 0.5
+18.2 330.0 0.5 0.5
+19 332.0 0.5 0.5
+19.2 334.0 0.5 0.5
+20 336.0 0.5 0.5
+20.2 338.0 0.5 0.5
+21 340.0 0.5 0.5
+21.2 342.0 0.5 0.5
+22 344.0 0.5 0.5
+22.2 346.0 0.5 0.5
+23 348.0 0.5 0.5
+23.2 350.0 0.5 0.5
+24 352.0 0.5 0.5
+24.2 354.0 0.5 0.5
+25 356.0 0.5 0.5
+25.2 358.0 0.5 0.5
+26 360.0 0.5 0.5
+26.2 362.0 0.5 0.5
+27 364.0 0.5 0.5
+27.2 366.0 0.5 0.5
+28 368.0 0.5 0.5
+28.2 370.0 0.5 0.5
+29 372.0 0.5 0.5
+29.2 374.0 0.5 0.5
+30 376.0 0.5 0.5
+30.2 378.0 0.5 0.5
+31 380.0 0.5 0.5
+31.2 382.0 0.5 0.5
+43.2 431.0 0.5 0.5
+44.2 435.0 0.5 0.5
+45.2 439.0 0.5 0.5
+46.2 443.0 0.5 0.5
+47.2 447.0 0.5 0.5
+Panel Name PowerPlex_12_v1
+Marker Name D5S818
+7 114.0 0.5 0.5
+8 118.0 0.5 0.5
+9 122.0 0.5 0.5
+10 126.0 0.5 0.5
+11 130.0 0.5 0.5
+12 134.0 0.5 0.5
+13 138.0 0.5 0.5
+14 142.0 0.5 0.5
+15 146.0 0.5 0.5
+16 150.0 0.5 0.5
+Marker Name D13S317
+7 170.0 0.5 0.5
+8 174.0 0.5 0.5
+9 178.0 0.5 0.5
+10 182.0 0.5 0.5
+11 186.0 0.5 0.5
+12 190.0 0.5 0.5
+13 194.0 0.5 0.5
+14 198.0 0.5 0.5
+15 202.0 0.5 0.5
+Marker Name D7S820
+6 213.0 0.5 0.5
+7 217.0 0.5 0.5
+8 221.0 0.5 0.5
+9 225.0 0.5 0.5
+10 229.0 0.5 0.5
+11 233.0 0.5 0.5
+12 237.0 0.5 0.5
+13 241.0 0.5 0.5
+14 245.0 0.5 0.5
+Marker Name D16S539
+5 262.0 0.5 0.5
+6 266.0 0.5 0.5
+7 270.0 0.5 0.5
+8 274.0 0.5 0.5
+9 278.0 0.5 0.5
+10 282.0 0.5 0.5
+11 286.0 0.5 0.5
+12 290.0 0.5 0.5
+13 294.0 0.5 0.5
+14 298.0 0.5 0.5
+15 302.0 0.5 0.5
+Marker Name vWA
+10 122.0 0.5 0.5
+11 126.0 0.5 0.5
+12 130.0 0.5 0.5
+13 134.0 0.5 0.5
+14 138.0 0.5 0.5
+15 142.0 0.5 0.5
+15.2 144.0 0.5 0.5
+16 146.0 0.5 0.5
+17 150.0 0.5 0.5
+18 154.0 0.5 0.5
+19 158.0 0.5 0.5
+20 162.0 0.5 0.5
+21 166.0 0.5 0.5
+Marker Name TH01
+5 176.0 0.5 0.5
+6 180.0 0.5 0.5
+7 184.0 0.5 0.5
+8 188.0 0.5 0.5
+9 192.0 0.5 0.5
+9.3 195.0 0.5 0.4
+10 196.0 0.5 0.5
+11 200.0 0.5 0.5
+Marker Name AMEL
+X 210.0 0.5 0.5
+Y 216.0 0.5 0.5
+Marker Name TPOX
+6 222.0 0.5 0.5
+7 226.0 0.5 0.5
+8 230.0 0.5 0.5
+9 234.0 0.5 0.5
+10 238.0 0.5 0.5
+11 242.0 0.5 0.5
+12 246.0 0.5 0.5
+13 250.0 0.5 0.5
+14 254.0 0.5 0.5
+Marker Name CSF1PO
+6 291.0 0.5 0.5
+7 295.0 0.5 0.5
+8 299.0 0.5 0.5
+9 303.0 0.5 0.5
+10 307.0 0.5 0.5
+11 311.0 0.5 0.5
+12 315.0 0.5 0.5
+13 319.0 0.5 0.5
+14 323.0 0.5 0.5
+15 327.0 0.5 0.5
+Panel Name Penta_E_v1
+Marker Name Penta_E
+4 372.0 0.5 0.5
+5 377.0 0.5 0.5
+6 382.0 0.5 0.5
+7 387.0 0.5 0.5
+8 392.0 0.5 0.5
+9 397.0 0.5 0.5
+10 402.0 0.5 0.5
+11 407.0 0.5 0.5
+12 412.0 0.5 0.5
+13 417.0 0.5 0.5
+14 422.0 0.5 0.5
+15 427.0 0.5 0.5
+16 432.0 0.5 0.5
+17 437.0 0.5 0.5
+18 442.0 0.5 0.5
+19 447.0 0.5 0.5
+20 452.0 0.5 0.5
+21 457.0 0.5 0.5
+22 462.0 0.5 0.5
+23 467.0 0.5 0.5
+24 472.0 0.5 0.5
+25 477.0 0.5 0.5
+Panel Name Penta_D_v1
+Marker Name Penta_D
+9 402.0 0.5 0.5
+10 407.0 0.5 0.5
+11 412.0 0.5 0.5
+12 417.0 0.5 0.5
+13 422.0 0.5 0.5
+14 427.0 0.5 0.5
+15 432.0 0.5 0.5
+16 437.0 0.5 0.5
+17 442.0 0.5 0.5
+2.2 369.0 0.5 0.5
+3.2 374.0 0.5 0.5
+5 382.0 0.5 0.5
+7 392.0 0.5 0.5
+8 397.0 0.5 0.5
+Panel Name SE33_v1
+Marker Name SE33
+4.2 196.0 0.5 0.5
+6.3 205.0 0.5 0.5
+7.2 208.0 0.5 0.5
+8 210.0 0.5 0.5
+8.2 212.0 0.5 0.5
+9 214.0 0.5 0.5
+9.2 216.0 0.5 0.5
+10 218.0 0.5 0.5
+10.2 220.0 0.5 0.5
+11 222.0 0.5 0.5
+11.2 224.0 0.5 0.5
+12 226.0 0.5 0.5
+12.2 228.0 0.5 0.5
+13 230.0 0.5 0.5
+13.2 232.0 0.5 0.5
+14 234.0 0.5 0.5
+14.2 236.0 0.5 0.4
+14.3 237.0 0.4 0.4
+15 238.0 0.4 0.5
+15.2 240.0 0.5 0.5
+16 242.0 0.5 0.5
+16.2 244.0 0.5 0.5
+17 246.0 0.5 0.5
+17.2 248.0 0.5 0.5
+18 250.0 0.5 0.5
+18.2 252.0 0.5 0.5
+19 254.0 0.5 0.5
+19.2 256.0 0.5 0.5
+20 258.0 0.5 0.5
+20.2 260.0 0.5 0.5
+21 262.0 0.5 0.4
+21.1 263.0 0.4 0.4
+21.2 264.0 0.4 0.5
+22 266.0 0.5 0.5
+22.2 268.0 0.5 0.5
+23 270.0 0.5 0.5
+23.2 272.0 0.5 0.5
+24 274.0 0.5 0.5
+24.2 276.0 0.5 0.5
+25 278.0 0.5 0.5
+25.2 280.0 0.5 0.5
+26 282.0 0.5 0.5
+26.2 284.0 0.5 0.5
+27 286.0 0.5 0.5
+27.2 288.0 0.5 0.5
+28 290.0 0.5 0.5
+28.2 292.0 0.5 0.5
+29 294.0 0.5 0.5
+29.2 296.0 0.5 0.5
+30 298.0 0.5 0.5
+30.2 300.0 0.5 0.5
+31 302.0 0.5 0.5
+31.2 304.0 0.5 0.5
+32 306.0 0.5 0.4
+32.1 307.0 0.4 0.4
+32.2 308.0 0.4 0.5
+33 310.0 0.5 0.5
+33.2 312.0 0.5 0.5
+34 314.0 0.5 0.5
+34.2 316.0 0.5 0.5
+35 318.0 0.5 0.5
+35.2 320.0 0.5 0.5
+36 322.0 0.5 0.5
+36.2 324.0 0.5 0.5
+37 326.0 0.5 0.5
+37.2 328.0 0.5 0.5
+38 330.0 0.5 0.5
+Panel Name FFFL_v1
+Marker Name LPL
+7 97.7 0.5 0.5
+8 101.7 0.5 0.5
+9 105.7 0.5 0.5
+10 109.7 0.5 0.5
+11 113.7 0.5 0.5
+12 117.7 0.5 0.5
+13 121.7 0.5 0.5
+14 125.7 0.5 0.5
+Marker Name F13B
+6 164.3 0.5 0.5
+7 168.3 0.5 0.5
+8 172.3 0.5 0.5
+9 176.3 0.5 0.5
+10 180.3 0.5 0.5
+11 184.2 0.5 0.5
+12 188.19 0.5 0.5
+Marker Name FESFPS
+7 218.0 0.5 0.5
+8 222.0 0.5 0.5
+9 226.0 0.5 0.5
+10 230.0 0.5 0.5
+11 234.0 0.5 0.5
+12 238.0 0.5 0.5
+13 242.0 0.5 0.5
+14 246.04 0.5 0.5
+Marker Name F13A01
+3.2 276.68 0.5 0.5
+4 278.7 0.5 0.5
+5 282.7 0.5 0.5
+6 286.7 0.5 0.5
+7 290.7 0.5 0.5
+8 294.7 0.5 0.5
+9 298.7 0.5 0.5
+10 302.27 0.5 0.5
+11 305.7 0.5 0.5
+12 309.4 0.5 0.5
+13 313.3 0.5 0.5
+14 317.2 0.5 0.5
+15 321.2 0.5 0.5
+16 325.1 0.5 0.5
+Panel Name CTTV_v1
+Marker Name vWA
+13 139.0 0.5 0.5
+14 143.0 0.5 0.5
+15 147.0 0.5 0.5
+16 151.0 0.5 0.5
+17 155.0 0.5 0.5
+18 159.0 0.5 0.5
+19 163.0 0.5 0.5
+20 167.0 0.5 0.5
+Marker Name TH01
+8 191.4 0.5 0.5
+9 195.4 0.5 0.5
+10 199.4 0.5 0.5
+11 202.97 0.5 0.5
+6 183.4 0.5 0.5
+7 187.4 0.5 0.5
+5 179.4 0.5 0.5
+Marker Name AMEL
+x 212.08 0.5 0.5
+Y 217.38 0.5 0.5
+Marker Name TPOX
+6 224.0 0.5 0.5
+7 228.0 0.5 0.5
+8 232.0 0.5 0.5
+9 236.0 0.5 0.5
+10 240.0 0.5 0.5
+11 244.0 0.5 0.5
+12 248.0 0.5 0.5
+13 252.03 0.5 0.5
+Marker Name CSF1PO
+7 294.5 0.5 0.5
+8 298.5 0.5 0.5
+9 302.5 0.5 0.5
+10 306.5 0.5 0.5
+11 310.5 0.5 0.5
+12 314.5 0.5 0.5
+13 318.5 0.5 0.5
+14 322.5 0.5 0.5
+15 326.32 0.5 0.5
+Panel Name NextGen_GS500
+Marker Name D10
+7 73 0.5 0.5 mutant
+8 77 0.5 0.5
+9 81 0.5 0.5
+10 85 0.5 0.5
+11 89 0.5 0.5
+12 93 0.5 0.5
+13 97 0.5 0.5
+14 101 0.5 0.5
+15 105 0.5 0.5
+16 109 0.5 0.5
+17 113 0.5 0.5
+18 117 0.5 0.5
+19 121 0.5 0.5 mutant
+20 125 0.5 0.5 mutant
+Marker Name vWA
+10 150 0.5 0.5 mutant
+11 154 0.5 0.5
+12 158 0.5 0.5
+13 162 0.5 0.5
+14 166 0.5 0.5
+15 170 0.5 0.5
+15.2 172 0.5 0.5 mutant
+16 174 0.5 0.5
+17 178 0.5 0.5
+18 182 0.5 0.5
+18.2 184 0.5 0.5 mutant
+19 186 0.5 0.5
+20 190 0.5 0.5
+21 194 0.5 0.5
+22 198 0.5 0.5
+23 202 0.5 0.5
+24 206 0.5 0.5
+25 210 0.5 0.5 mutant
+Marker Name D16S539
+5 228.5 0.5 0.5
+6 232.5 0.5 0.5 mutant
+7 236.5 0.5 0.5 mutant
+8 240.5 0.5 0.5
+9 244.5 0.5 0.5
+10 248.5 0.5 0.5
+11 252.5 0.5 0.5
+12 256.5 0.5 0.5
+12.2 258.5 0.5 0.5 mutant
+13 260.5 0.5 0.5
+14 264.5 0.5 0.5
+15 268.5 0.5 0.5
+16 272.5 0.5 0.5 mutant
+Marker Name D2S1338
+14 285.5 0.5 0.5 mutant
+15 289.5 0.5 0.5
+16 293.5 0.5 0.5
+17 297.5 0.5 0.5
+18 301.5 0.5 0.5
+19 305.5 0.5 0.5
+20 309.5 0.5 0.5
+21 313.5 0.5 0.5
+22 317.5 0.5 0.5
+23 321.5 0.5 0.5
+24 325.5 0.5 0.5
+25 329.5 0.5 0.5
+26 333.5 0.5 0.5
+27 337.5 0.5 0.5
+28 341.5 0.5 0.5
+29 345.5 0.5 0.5 mutant
+Marker Name AMEL
+X 101.4 0.5 0.5
+Y 107.4 0.5 0.5
+Marker Name D8S1179
+7 121 0.5 0.5 mutant
+8 125 0.5 0.5
+9 129 0.5 0.5
+10 133 0.5 0.5
+11 137 0.5 0.5
+12 141 0.5 0.5
+13 145 0.5 0.5
+14 149 0.5 0.5
+15 153 0.5 0.5
+16 157 0.5 0.5
+17 161 0.5 0.5
+18 165 0.5 0.5
+19 169 0.5 0.5
+20 173 0.5 0.5 mutant
+Marker Name D21S11
+23.2 183 0.5 0.5 mutant
+24 185 0.5 0.5
+24.2 187 0.5 0.5
+25 189 0.5 0.5
+25.2 191 0.5 0.5 mutant
+26 193 0.5 0.5
+26.2 195 0.5 0.5 mutant
+27 197 0.5 0.5
+27.2 199 0.5 0.5 mutant
+28 201 0.5 0.5
+28.2 203 0.5 0.5
+29 205 0.5 0.5
+29.2 207 0.5 0.5
+30 209 0.5 0.5
+30.2 211 0.5 0.5
+31 213 0.5 0.5
+31.2 215 0.5 0.5
+32 217 0.5 0.5
+32.2 219 0.5 0.5
+33 221 0.5 0.5
+33.2 223 0.5 0.5
+34 225 0.5 0.5
+34.2 227 0.5 0.5
+35 229 0.5 0.5
+35.2 231 0.5 0.5
+36 233 0.5 0.5
+36.2 235 0.5 0.5 mutant
+37 237 0.5 0.5
+37.2 239 0.5 0.5 mutant
+38 241 0.5 0.5
+38.2 243 0.5 0.5 mutant
+39 245 0.5 0.5 mutant
+Marker Name D18S51
+7 263 0.5 0.5
+8 267 0.5 0.5 mutant
+9 271 0.5 0.5
+9.2 273 0.5 0.5 mutant
+10 275 0.5 0.5
+10.2 277 0.5 0.5
+11 279 0.5 0.5
+11.2 281 0.5 0.5 mutant
+12 283 0.5 0.5
+12.2 285 0.5 0.5 mutant
+13 287 0.5 0.5
+13.2 289 0.5 0.5
+14 291 0.5 0.5
+14.2 293 0.5 0.5
+15 295 0.5 0.5
+15.2 297 0.5 0.5 mutant
+16 299 0.5 0.5
+16.2 301 0.5 0.5 mutant
+17 303 0.5 0.5
+17.2 305 0.5 0.5 mutant
+18 307 0.5 0.5
+18.2 309 0.5 0.5 mutant
+19 311 0.5 0.5
+19.2 313 0.5 0.5 mutant
+20 315 0.5 0.5
+20.2 317 0.5 0.5 mutant
+21 319 0.5 0.5
+21.2 321 0.5 0.5 mutant
+22 323 0.5 0.5
+22.2 325 0.5 0.5 mutant
+23 327 0.5 0.5
+23.2 329 0.5 0.5 mutant
+24 331 0.5 0.5
+25 335 0.5 0.5
+26 339 0.5 0.5
+27 343 0.5 0.5
+Marker Name D22
+8 80 0.5 0.5
+9 83 0.5 0.5
+10 86 0.5 0.5
+11 89 0.5 0.5
+12 92 0.5 0.5
+13 95 0.5 0.5
+14 98 0.5 0.5
+15 101 0.5 0.5
+16 104 0.5 0.5
+17 107 0.5 0.5
+18 110 0.5 0.5
+19 113 0.5 0.5
+Marker Name D19S433
+9 127.3 0.5 0.5
+9.2 129.3 0.5 0.5 mutant
+10 131.3 0.5 0.5
+10.2 133.3 0.5 0.5 mutant
+11 135.3 0.5 0.5
+11.2 137.3 0.5 0.5 mutant
+12 139.3 0.5 0.5
+12.2 141.3 0.5 0.5
+13 143.3 0.5 0.5
+13.2 145.3 0.5 0.5
+14 147.3 0.5 0.5
+14.2 149.3 0.5 0.5
+15 151.3 0.5 0.5
+15.2 153.3 0.5 0.5
+16 155.3 0.5 0.5
+16.2 157.3 0.5 0.5
+17 159.3 0.5 0.5
+17.2 161.3 0.5 0.5
+18 163.3 0.5 0.5 mutant
+18.2 165.3 0.5 0.5 mutant
+Marker Name TH01
+3 177 0.5 0.5 mutant
+4 181 0.5 0.5
+5 185 0.5 0.5
+5.3 188 0.5 0.49 mutant
+6 189 0.49 0.5
+6.3 192 0.5 0.49 mutant
+7 193 0.49 0.5
+7.3 196 0.5 0.49 mutant
+8 197 0.49 0.5
+8.3 200 0.5 0.49 mutant
+9 201 0.49 0.5
+9.3 204 0.5 0.49
+10 205 0.49 0.5
+10.3 208 0.5 0.49 mutant
+11 209 0.49 0.5
+12 213 0.5 0.5 mutant
+13 217 0.5 0.5 mutant
+13.3 220 0.5 0.5
+Marker Name FGA
+16 226 0.5 0.5 mutant
+16.2 228 0.5 0.5 mutant
+17 230 0.5 0.5
+17.2 232 0.5 0.5 mutant
+18 234 0.5 0.5
+18.2 236 0.5 0.5 mutant
+19 238 0.5 0.5
+19.2 240 0.5 0.5 mutant
+20 242 0.5 0.5
+20.2 244 0.5 0.5 mutant
+21 246 0.5 0.5
+21.2 248 0.5 0.5 mutant
+22 250 0.5 0.5
+22.2 252 0.5 0.5 mutant
+23 254 0.5 0.5
+23.2 256 0.5 0.5 mutant
+24 258 0.5 0.5
+24.2 260 0.5 0.5 mutant
+25 262 0.5 0.5
+25.2 264 0.5 0.5 mutant
+26 266 0.5 0.5
+26.2 268 0.5 0.5
+27 270 0.5 0.5
+27.2 272 0.5 0.5 mutant
+28 274 0.5 0.5
+28.2 276 0.5 0.5 mutant
+29 278 0.5 0.5
+29.2 280 0.5 0.5 mutant
+30 282 0.5 0.5
+30.2 284 0.5 0.5
+31 286 0.5 0.5 mutant
+31.2 288 0.5 0.5
+32 290 0.5 0.5 mutant
+32.2 292 0.5 0.5
+33.2 296 0.5 0.5
+34.2 300 0.5 0.5 mutant
+42.2 332 0.5 0.5
+43.2 336 0.5 0.5
+44.2 340 0.5 0.5
+45.2 344 0.5 0.5
+46.2 348 0.5 0.5
+47.2 352 0.5 0.5
+48.2 356 0.5 0.5
+49.2 360 0.5 0.5 mutant
+50.2 364 0.5 0.5
+51.2 368 0.5 0.5
+Marker Name D2S441
+9 80 0.5 0.5
+10 84 0.5 0.5
+11 88 0.5 0.5
+11.3 91 0.5 0.49
+12 92 0.49 0.5
+12.3 95 0.5 0.49 mutant
+13 96 0.49 0.5
+13.3 99 0.5 0.49 mutant
+14 100 0.49 0.5
+14.3 103 0.5 0.49 mutant
+15 104 0.49 0.5
+16 108 0.5 0.5
+17 112 0.5 0.5 mutant
+Marker Name D3S1358
+11 131 0.5 0.5 mutant
+12 135 0.5 0.5
+13 139 0.5 0.5
+14 143 0.5 0.5
+15 147 0.5 0.5
+15.2 149 0.5 0.5 mutant
+16 151 0.5 0.5
+16.2 153 0.5 0.5 mutant
+17 155 0.5 0.5
+17.2 157 0.5 0.5 mutant
+18 159 0.5 0.5
+18.2 161 0.5 0.5 mutant
+19 163 0.5 0.5
+20 167 0.5 0.5 mutant
+Marker Name D1S1656
+9 175 0.5 0.5
+10 179 0.5 0.5
+11 183 0.5 0.5
+12 187 0.5 0.5
+13 191 0.5 0.5
+14 195 0.5 0.5
+14.3 198 0.5 0.49
+15 199 0.49 0.5
+15.3 202 0.5 0.49
+16 203 0.49 0.5
+16.3 206 0.5 0.49
+17 207 0.49 0.5
+17.3 210 0.5 0.49
+18 211 0.49 0.5 mutant
+18.3 214 0.5 0.5
+19.3 218 0.5 0.5
+20.3 222 0.5 0.5
+Marker Name D12S391
+14 229 0.5 0.5
+15 233 0.5 0.5
+16 237 0.5 0.5
+17 241 0.5 0.5
+17.3 244 0.5 0.49 mutant
+18 245 0.49 0.5
+18.3 248 0.5 0.49 mutant
+19 249 0.49 0.5
+19.1 250 0.49 0.5 mutant
+19.3 252 0.5 0.5
+20 253 0.5 0.5
+20.3 256 0.5 0.49 mutant
+21 257 0.5 0.5
+22 261 0.5 0.5
+23 265 0.5 0.5
+24 269 0.5 0.5
+25 273 0.5 0.5
+26 277 0.5 0.5
+27 281 0.5 0.5
+Panel Name Prototype_PowerPlex_EP01
+Marker Name AMEL
+X 82.01 0.5 0.5
+Y 87.86 0.5 0.5
+Marker Name D3S1358
+8 94.5 0.5 0.5
+9 98.56 0.5 0.5
+10 102.62 0.5 0.5
+11 106.78 0.5 0.5
+12 110.84 0.5 0.5
+13 114.90 0.5 0.5
+14 119.00 0.5 0.5
+15 123.10 0.5 0.5
+15.2 125.15 0.5 0.5
+16 127.20 0.5 0.5
+17 131.31 0.5 0.5
+18 135.41 0.5 0.5
+19 139.50 0.5 0.5
+20 143.62 0.5 0.5
+21 147.72 0.5 0.5
+Marker Name TH01
+4 152.84 0.5 0.5
+5 156.83 0.5 0.5
+6 160.95 0.5 0.5
+7 164.90 0.5 0.5
+8 168.85 0.5 0.5
+9 172.79 0.5 0.5
+9.3 175.74 0.5 0.4
+10 176.78 0.5 0.5
+11 180.77 0.5 0.5
+13.3 191.77 0.5 0.5
+Marker Name D21S11
+23.2 196.86 0.5 0.5
+24 198.88 0.5 0.5
+24.2 200.91 0.5 0.5
+25 202.93 0.5 0.5
+25.2 204.96 0.5 0.5
+26 206.98 0.5 0.5
+26.2 209.01 0.5 0.5
+27 211.03 0.5 0.5
+27.2 213.06 0.5 0.5
+28 215.08 0.5 0.5
+28.2 217.09 0.5 0.5
+29 219.10 0.5 0.5
+29.2 221.12 0.5 0.5
+30 223.14 0.5 0.5
+30.2 225.16 0.5 0.5
+31 227.19 0.5 0.5
+31.2 229.20 0.5 0.5
+32 231.23 0.5 0.5
+32.2 233.25 0.5 0.5
+33 235.28 0.5 0.5
+33.2 237.29 0.5 0.5
+34 239.32 0.5 0.5
+34.2 241.34 0.5 0.5
+35 243.37 0.5 0.5
+35.2 245.38 0.5 0.5
+36 247.42 0.5 0.5
+36.2 249.45 0.5 0.5
+37 251.47 0.5 0.5
+37.2 253.50 0.5 0.5
+38 255.52 0.5 0.5
+38.2 257.64 0.5 0.5
+39 259.73 0.5 0.5
+Marker Name D18S51
+7 280.22 0.5 0.5
+8 284.13 0.5 0.5
+9 288.05 0.5 0.5
+9.2 290.00 0.5 0.5
+10 291.96 0.5 0.5
+10.2 293.92 0.5 0.5
+11 295.88 0.5 0.5
+12 299.79 0.5 0.5
+13 303.71 0.5 0.5
+13.2 305.67 0.5 0.5
+14 307.62 0.5 0.5
+14.2 309.49 0.5 0.5
+15 311.36 0.5 0.5
+16 315.27 0.5 0.5
+17 319.14 0.5 0.5
+18 323.03 0.5 0.5
+19 326.93 0.5 0.5
+19.2 328.98 0.5 0.5
+20 330.83 0.5 0.5
+21 334.73 0.5 0.5
+22 338.63 0.5 0.5
+23 342.53 0.5 0.5
+24 346.48 0.5 0.5
+25 350.43 0.5 0.5
+26 354.33 0.5 0.5
+27 358.23 0.5 0.5
+Marker Name D10S1248
+8 77.18 0.5 0.5
+9 81.25 0.5 0.5
+10 85.32 0.5 0.5
+11 89.39 0.5 0.5
+12 93.49 0.5 0.5
+13 97.55 0.5 0.5
+14 101.66 0.5 0.5
+15 105.81 0.5 0.5
+16 109.98 0.5 0.5
+17 114.10 0.5 0.5
+18 118.23 0.5 0.5
+19 122.36 0.5 0.5
+Marker Name D1S1656
+9 129.83 0.5 0.5
+10 133.95 0.5 0.5
+11 138.08 0.5 0.5
+12 142.16 0.5 0.5
+13 146.17 0.5 0.5
+13.3 149.23 0.5 0.4
+14 150.26 0.5 0.5
+14.3 153.35 0.5 0.4
+15 154.35 0.5 0.5
+15.3 157.37 0.5 0.4
+16 158.45 0.5 0.5
+16.3 161.46 0.5 0.4
+17 162.46 0.5 0.4
+17.1 163.48 0.5 0.5
+17.3 165.55 0.5 0.4
+18 166.47 0.5 0.5
+18.3 169.48 0.5 0.4
+19 170.56 0.5 0.5
+19.3 173.58 0.5 0.5
+20.3 177.60 0.5 0.5
+Marker Name D2S1338
+10 191.80 0.5 0.5
+11 195.77 0.5 0.5
+12 199.74 0.5 0.5
+13 203.71 0.5 0.5
+14 207.68 0.5 0.5
+15 211.65 0.5 0.5
+16 215.62 0.5 0.5
+17 219.59 0.5 0.5
+18 223.56 0.5 0.5
+19 227.53 0.5 0.5
+19.3 230.53 0.5 0.4
+20 231.60 0.5 0.5
+21 235.55 0.5 0.5
+22 239.49 0.5 0.5
+23 243.49 0.5 0.5
+23.2 245.50 0.5 0.4
+23.3 246.48 0.5 0.4
+24 247.48 0.5 0.5
+25 251.47 0.5 0.5
+26 255.46 0.5 0.5
+27 259.45 0.5 0.5
+28 263.36 0.5 0.5
+Marker Name D16S539
+4 269.30 0.5 0.5
+5 273.37 0.5 0.5
+6 277.41 0.5 0.5
+7 281.44 0.5 0.5
+8 285.31 0.5 0.5
+9 289.34 0.5 0.5
+10 293.38 0.5 0.5
+11 297.33 0.5 0.5
+12 301.30 0.5 0.5
+13 305.39 0.5 0.5
+14 309.40 0.5 0.5
+15 313.43 0.5 0.5
+16 317.55 0.5 0.5
+Marker Name D22S1045
+6 75.54 0.5 0.5
+7 78.52 0.5 0.5
+8 81.49 0.5 0.5
+9 84.45 0.5 0.5
+10 87.42 0.5 0.5
+11 90.41 0.5 0.5
+12 93.34 0.5 0.5
+13 96.36 0.5 0.5
+14 99.33 0.5 0.5
+15 102.41 0.5 0.5
+16 105.43 0.5 0.5
+17 108.46 0.5 0.5
+18 111.50 0.5 0.5
+19 114.56 0.5 0.5
+Marker Name vWA
+10 123.27 0.5 0.5
+11 127.31 0.5 0.5
+12 131.28 0.5 0.5
+13 135.41 0.5 0.5
+14 139.46 0.5 0.5
+15 143.47 0.5 0.5
+16 147.55 0.5 0.5
+17 151.57 0.5 0.5
+18 155.59 0.5 0.5
+19 159.61 0.5 0.5
+20 163.62 0.5 0.5
+21 167.55 0.5 0.5
+22 171.49 0.5 0.5
+23 175.42 0.5 0.5
+24 179.38 0.5 0.5
+25 183.36 0.5 0.5
+Marker Name D8S1179
+7 203.77 0.5 0.5
+8 207.78 0.5 0.5
+9 211.81 0.5 0.5
+10 215.77 0.5 0.5
+11 219.82 0.5 0.5
+12 223.80 0.5 0.5
+13 227.81 0.5 0.5
+14 231.84 0.5 0.5
+15 235.88 0.5 0.5
+16 239.92 0.5 0.5
+17 243.98 0.5 0.5
+18 248.13 0.5 0.5
+19 252.20 0.5 0.5
+20 256.27 0.5 0.5
+Marker Name FGA
+12.2 258.76 0.5 0.5
+16 272.13 0.5 0.5
+17 275.95 0.5 0.5
+17.2 277.86 0.5 0.5
+18 279.77 0.5 0.5
+18.2 281.68 0.5 0.5
+19 283.59 0.5 0.5
+19.2 285.50 0.5 0.5
+20 287.41 0.5 0.5
+20.2 289.32 0.5 0.5
+21 291.23 0.5 0.5
+21.2 293.14 0.5 0.5
+22 295.06 0.5 0.5
+22.2 296.97 0.5 0.5
+23 298.88 0.5 0.5
+23.2 300.80 0.5 0.5
+24 302.73 0.5 0.5
+24.2 304.67 0.5 0.5
+25 306.51 0.5 0.5
+25.2 308.46 0.5 0.5
+26 310.38 0.5 0.5
+26.2 312.36 0.5 0.5
+27 314.33 0.5 0.5
+27.2 316.24 0.5 0.5
+28 318.15 0.5 0.5
+28.2 320.11 0.5 0.5
+29 322.06 0.5 0.5
+29.2 323.99 0.5 0.5
+30 325.91 0.5 0.5
+30.2 327.84 0.5 0.5
+31 329.97 0.5 0.5
+31.2 332.09 0.5 0.5
+43.2 375.23 0.5 0.5
+44.2 379.10 0.5 0.5
+45.2 382.99 0.5 0.5
+46.2 386.97 0.5 0.5
+47.2 390.66 0.5 0.5
+50.3 402.65 0.5 0.5
+Marker Name D2S441
+8 88.29 0.5 0.5
+9 92.37 0.5 0.5
+10 96.44 0.5 0.5
+11 100.60 0.5 0.5
+11.3 103.62 0.5 0.4
+12 104.68 0.5 0.5
+12.3 107.78 0.5 0.4
+13 108.84 0.5 0.5
+13.3 111.95 0.5 0.4
+14 113.03 0.5 0.5
+14.3 116.10 0.5 0.4
+15 117.16 0.5 0.5
+16 121.28 0.5 0.5
+17 125.40 0.5 0.5
+Marker Name D12S391
+13 128.75 0.5 0.5
+14 132.88 0.5 0.5
+15 137.01 0.5 0.5
+16 140.92 0.5 0.5
+17 145.09 0.5 0.5
+17.3 148.02 0.5 0.4
+18 149.10 0.5 0.5
+18.3 152.11 0.5 0.5
+19 153.19 0.5 0.5
+19.3 156.15 0.5 0.5
+20 157.21 0.5 0.5
+21 161.23 0.5 0.5
+22 165.32 0.5 0.5
+23 169.48 0.5 0.5
+24 173.42 0.5 0.5
+25 177.45 0.5 0.5
+26 181.48 0.5 0.5
+27 185.45 0.5 0.5
+Marker Name D19S433
+5.2 193.33 0.5 0.5
+6.2 197.25 0.5 0.5
+7 199.22 0.5 0.5
+8 203.14 0.5 0.5
+9 207.07 0.5 0.5
+9.2 209.05 0.5 0.5
+10 211.02 0.5 0.5
+10.2 212.96 0.5 0.5
+11 214.90 0.5 0.5
+11.2 216.89 0.5 0.5
+12 218.87 0.5 0.5
+12.2 220.85 0.5 0.5
+13 222.85 0.5 0.5
+13.2 224.76 0.5 0.5
+14 226.77 0.5 0.5
+14.2 228.78 0.5 0.5
+15 230.71 0.5 0.5
+15.2 232.73 0.5 0.5
+16 234.67 0.5 0.5
+16.2 236.69 0.5 0.5
+17 238.71 0.5 0.5
+17.2 240.65 0.5 0.5
+18 242.68 0.5 0.5
+18.2 244.71 0.5 0.5
+19 246.61 0.5 0.5
+19.2 248.53 0.5 0.5
+20.2 252.69 0.5 0.5
+Panel Name Identifiler_v2
+Marker Name D8S1179
+8 128.0 0.5 0.5
+9 132.0 0.5 0.5
+10 136.0 0.5 0.5
+11 140.0 0.5 0.5
+12 144.0 0.5 0.5
+13 148.0 0.5 0.5
+14 152.0 0.5 0.5
+15 156.0 0.5 0.5
+16 160.0 0.5 0.5
+17 164.0 0.5 0.5
+18 168.0 0.5 0.5
+19 172.0 0.5 0.5
+Marker Name D21S11
+24 187.0 0.5 0.5
+24.2 189.0 0.5 0.5
+25 191.0 0.5 0.5
+25.2 193.0 0.5 0.5
+26 195.0 0.5 0.5
+26.2 197.0 0.5 0.5
+27 199.0 0.5 0.5
+27.2 201.0 0.5 0.5
+28 203.0 0.5 0.5
+28.2 205.0 0.5 0.5
+29 207.0 0.5 0.5
+29.2 209.0 0.5 0.5
+30 211.0 0.5 0.5
+30.2 213.0 0.5 0.5
+31 215.0 0.5 0.5
+31.2 217.0 0.5 0.5
+32 219.0 0.5 0.5
+32.2 221.0 0.5 0.5
+33 223.0 0.5 0.5
+33.2 225.0 0.5 0.5
+34 227.0 0.5 0.5
+34.2 229.0 0.5 0.5
+35 231.0 0.5 0.5
+35.2 233.0 0.5 0.5
+36 235.0 0.5 0.5
+36.2 237.0 0.5 0.5
+37 239.0 0.5 0.5
+37.2 241.0 0.5 0.5
+38 243.0 0.5 0.5
+Marker Name D7S820
+6 258.0 0.5 0.5
+7 262.0 0.5 0.5
+8 266.0 0.5 0.5
+8.2 268.0 0.5 0.5
+9 270.0 0.5 0.5
+9.2 272.0 0.5 0.5
+10 274.0 0.5 0.5
+11 278.0 0.5 0.5
+12 282.0 0.5 0.5
+13 286.0 0.5 0.5
+14 290.0 0.5 0.5
+15 294.0 0.5 0.5
+Marker Name CSF1PO
+6 306.63 0.5 0.5
+7 310.63 0.5 0.5
+8 314.63 0.5 0.5
+9 318.63 0.5 0.5
+10 322.63 0.5 0.5
+10.2 324.63 0.5 0.5
+11 326.63 0.5 0.5
+12 330.63 0.5 0.5
+13 334.63 0.5 0.5
+14 338.63 0.5 0.5
+15 342.63 0.5 0.5
+Marker Name D3S1358
+15.2 128.0 0.5 0.5
+16 130.0 0.5 0.5
+16.2 132.0 0.5 0.5
+17 134.0 0.5 0.5
+17.2 136.0 0.5 0.5
+18 138.0 0.5 0.5
+18.2 140.0 0.5 0.5
+19 142.0 0.5 0.5
+12 114.0 0.5 0.5
+13 118.0 0.5 0.5
+14 122.0 0.5 0.5
+15 126.0 0.5 0.5
+Marker Name TH01
+4 165.0 0.5 0.5
+5 169.0 0.5 0.5
+5.3 172.0 0.5 0.4
+6 173.0 0.4 0.5
+6.3 176.0 0.5 0.4
+7 177.0 0.4 0.5
+7.3 180.0 0.5 0.4
+8 181.0 0.4 0.5
+8.3 184.0 0.5 0.4
+9 185.0 0.4 0.5
+9.3 188.0 0.5 0.4
+10 189.0 0.4 0.5
+10.3 192.0 0.5 0.4
+11 193.0 0.4 0.5
+12 197.0 0.5 0.5
+13 201.0 0.5 0.5
+13.3 204.0 0.5 0.5
+Marker Name D13S317
+8 217.65 0.5 0.5
+9 221.65 0.5 0.5
+10 225.65 0.5 0.5
+11 229.65 0.5 0.5
+12 233.65 0.5 0.5
+13 237.65 0.5 0.5
+14 241.65 0.5 0.5
+15 245.65 0.5 0.5
+Marker Name D16S539
+5 257.3 0.5 0.5
+6 261.3 0.5 0.5
+7 265.3 0.5 0.5
+8 269.3 0.5 0.5
+9 273.3 0.5 0.5
+10 277.3 0.5 0.5
+11 281.3 0.5 0.5
+12 285.3 0.5 0.5
+12.2 287.3 0.5 0.5
+13 289.3 0.5 0.5
+14 293.3 0.5 0.5
+15 297.3 0.5 0.5
+Marker Name D2S1338
+15 309.31 0.5 0.5
+16 313.31 0.5 0.5
+17 317.31 0.5 0.5
+18 321.31 0.5 0.5
+19 325.31 0.5 0.5
+20 329.31 0.5 0.5
+21 333.31 0.5 0.5
+22 337.31 0.5 0.5
+23 341.31 0.5 0.5
+24 345.31 0.5 0.5
+25 349.31 0.5 0.5
+26 353.31 0.5 0.5
+27 357.31 0.5 0.5
+28 361.31 0.5 0.5
+Marker Name D19S433
+9 106.0 0.5 0.5
+9.2 108.0 0.5 0.5
+10 110.0 0.5 0.5
+10.2 112.0 0.5 0.5
+11 114.0 0.5 0.5
+11.2 116.0 0.5 0.5
+12 118.0 0.5 0.5
+12.2 120.0 0.5 0.5
+13 122.0 0.5 0.5
+13.2 124.0 0.5 0.5
+14 126.0 0.5 0.5
+14.2 128.0 0.5 0.5
+15 130.0 0.5 0.5
+15.2 132.0 0.5 0.5
+16 134.0 0.5 0.5
+16.2 136.0 0.5 0.5
+17 138.0 0.5 0.5
+17.2 140.0 0.5 0.5
+Marker Name vWA
+11 157.0 0.5 0.5
+12 161.0 0.5 0.5
+13 165.0 0.5 0.5
+14 169.0 0.5 0.5
+15 173.0 0.5 0.5
+15.2 175.0 0.5 0.5
+16 177.0 0.5 0.5
+17 181.0 0.5 0.5
+18 185.0 0.5 0.5
+18.2 187.0 0.5 0.5
+19 189.0 0.5 0.5
+20 193.0 0.5 0.5
+21 197.0 0.5 0.5
+22 201.0 0.5 0.5
+23 205.0 0.5 0.5
+24 209.0 0.5 0.5
+Marker Name TPOX
+6 224.99 0.5 0.5
+7 228.99 0.5 0.5
+8 232.99 0.5 0.5
+9 236.99 0.5 0.5
+10 240.99 0.5 0.5
+11 244.99 0.5 0.5
+12 248.99 0.5 0.5
+13 252.99 0.5 0.5
+Marker Name D18S51
+10.2 279.0 0.5 0.5
+11 281.0 0.5 0.5
+11.2 283.0 0.5 0.5
+12 285.0 0.5 0.5
+12.2 287.0 0.5 0.5
+13 289.0 0.5 0.5
+13.2 291.0 0.5 0.5
+14 293.0 0.5 0.5
+14.2 295.0 0.5 0.5
+15 297.0 0.5 0.5
+15.2 299.0 0.5 0.5
+16 301.0 0.5 0.5
+16.2 303.0 0.5 0.5
+17 305.0 0.5 0.5
+17.2 307.0 0.5 0.5
+18 309.0 0.5 0.5
+18.2 311.0 0.5 0.5
+19 313.0 0.5 0.5
+19.2 315.0 0.5 0.5
+20 317.0 0.5 0.5
+20.2 319.0 0.5 0.5
+21 321.0 0.5 0.5
+21.2 323.0 0.5 0.5
+22 325.0 0.5 0.5
+22.2 327.0 0.5 0.5
+23 329.0 0.5 0.5
+23.2 331.0 0.5 0.5
+24 333.0 0.5 0.5
+25 337.0 0.5 0.5
+26 341.0 0.5 0.5
+27 345.0 0.5 0.5
+7 265.0 0.5 0.5
+8 269.0 0.5 0.5
+9 273.0 0.5 0.5
+9.2 275.0 0.5 0.5
+10 277.0 0.5 0.5
+Marker Name AMEL
+X 107.0 0.5 0.5
+Y 113.0 0.5 0.5
+Marker Name D5S818
+7 135.0 0.5 0.5
+8 139.0 0.5 0.5
+9 143.0 0.5 0.5
+10 147.0 0.5 0.5
+11 151.0 0.5 0.5
+12 155.0 0.5 0.5
+13 159.0 0.5 0.5
+14 163.0 0.5 0.5
+15 167.0 0.5 0.5
+16 171.0 0.5 0.5
+Marker Name FGA
+17 215.0 0.5 0.5
+17.2 217.0 0.5 0.5
+18 219.0 0.5 0.5
+18.2 221.0 0.5 0.5
+19 223.0 0.5 0.5
+19.2 225.0 0.5 0.5
+20 227.0 0.5 0.5
+20.2 229.0 0.5 0.5
+21 231.0 0.5 0.5
+21.2 233.0 0.5 0.5
+22 235.0 0.5 0.5
+22.2 237.0 0.5 0.5
+23 239.0 0.5 0.5
+23.2 241.0 0.5 0.5
+24 243.0 0.5 0.5
+24.2 245.0 0.5 0.5
+25 247.0 0.5 0.5
+25.2 249.0 0.5 0.5
+26 251.0 0.5 0.5
+26.2 253.0 0.5 0.5
+27 255.0 0.5 0.5
+27.2 257.0 0.5 0.5
+28 259.0 0.5 0.5
+28.2 261.0 0.5 0.5
+29 263.0 0.5 0.5
+29.2 265.0 0.5 0.5
+30 267.0 0.5 0.5
+30.2 269.0 0.5 0.5
+31 271.0 0.5 0.5
+31.2 273.0 0.5 0.5
+32 275.0 0.5 0.5
+32.2 277.0 0.5 0.5
+33.2 281.0 0.5 0.5
+42.2 317.0 0.5 0.5
+43.2 321.0 0.5 0.5
+44.2 325.0 0.5 0.5
+45.2 329.0 0.5 0.5
+46.2 333.0 0.5 0.5
+47.2 337.0 0.5 0.5
+48.2 341.0 0.5 0.5
+49.2 345.0 0.5 0.5
+50.2 349.0 0.5 0.5
+51.2 353.0 0.5 0.5
diff --git a/inst/abif/NGM_Pa.txt b/inst/abif/NGM_Pa.txt
new file mode 100644
index 0000000..16c803b
--- /dev/null
+++ b/inst/abif/NGM_Pa.txt
@@ -0,0 +1,215 @@
+Version GM v 3.0
+Kit type Microsatellite
+Chemistry Kit AmpFLSTR_Panels_v1 Promega_Panels_v1 NextGen
+Panel Blue_v1
+D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
+vWA blue 151.00 203.00 17,18 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+FGA blue 206.25 360.00 23,24 4 0.110 none 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30,
+Panel Green_I_v1
+AMEL green 106.00 114.00 x 9 0.060 none X, Y,
+TH01 green 159.00 205.00 8,9.3 4 0.060 none 5, 6, 7, 8, 9, 9.3, 10,
+TPOX green 212.00 254.00 8 4 0.060 none 6, 7, 8, 9, 10, 11, 12, 13,
+CSF1PO green 275.00 323.00 10,12 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+Panel Profiler_v1
+D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
+vWA blue 151.00 203.00 17,18 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+FGA blue 206.25 360.00 23,24 4 0.110 none 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30,
+AMEL green 106.00 114.00 x 9 0.000 none X, Y,
+TH01 green 159.00 205.00 8,9.3 4 0.060 none 5, 6, 7, 8, 9, 9.3, 10,
+TPOX green 212.00 254.00 8 4 0.060 none 6, 7, 8, 9, 10, 11, 12, 13,
+CSF1PO green 275.00 323.00 10,12 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+D5S818 yellow 128.00 180.00 11 4 0.100 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+D13S317 yellow 192.00 242.00 11 4 0.100 none 8, 9, 10, 11, 12, 13, 14, 15,
+D7S820 yellow 251.00 298.50 10,11 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+Panel Profiler_Plus_v1
+D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
+vWA blue 151.00 203.00 17,18 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+FGA blue 206.25 360.00 23,24 4 0.110 none 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30,
+AMEL green 106.00 114.00 x 9 0.000 none X, Y,
+D8S1179 green 118.00 183.50 13 4 0.120 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+D21S11 green 184.50 247.50 30 4 0.130 none 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 38,
+D18S51 green 264.49 350.00 15,19 4 0.160 none 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+D5S818 yellow 128.00 180.00 11 4 0.100 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+D13S317 yellow 192.00 242.00 11 4 0.100 none 8, 9, 10, 11, 12, 13, 14, 15,
+D7S820 yellow 251.00 298.50 10,11 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+Panel COfiler_v1
+D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
+D16S539 blue 229.00 279.00 11,12 4 0.130 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
+AMEL green 106.00 114.00 x 9 0.000 none X, Y,
+TH01 green 159.00 205.00 8,9.3 4 0.060 none 5, 6, 7, 8, 9, 9.3, 10,
+TPOX green 212.00 254.00 8 4 0.060 none 6, 7, 8, 9, 10, 11, 12, 13,
+CSF1PO green 275.00 323.00 10,12 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+D7S820 yellow 251.00 298.50 10,11 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+Panel SGM_Plus_v1
+D3S1358 blue 98.00 148.00 15,16 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
+vWA blue 151.00 213.50 14,16 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+D16S539 blue 229.00 279.00 9,10 4 0.130 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
+D2S1338 blue 284.00 354.00 20,23 4 0.150 none 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+AMEL green 106.00 114.00 x,y 9 0.000 none X, Y,
+D8S1179 green 118.00 183.50 12,13 4 0.120 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+D21S11 green 184.50 247.50 28,31 4 0.130 none 24, 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
+D18S51 green 264.49 350.00 12,15 4 0.160 none 7, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
+D19S433 yellow 101.00 148.00 14,15 4 0.170 none 9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2,
+TH01 yellow 159.00 205.00 7,9.3 4 0.060 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
+FGA yellow 206.25 360.00 24,26 4 0.110 none 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30, 30.2, 31.2, 32.2, 33.2, 42.2, 43.2, 44.2, 45.2, 46.2, 47.2, 48.2, 50.2, 51.2,
+Panel Identifiler_v1
+D8S1179 blue 118.00 183.50 13 4 0.082 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+D21S11 blue 184.50 247.50 30 4 0.094 none 24, 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
+D7S820 blue 251.00 298.50 10,11 4 0.082 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+CSF1PO blue 302.12 348.63 10,12 4 0.092 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+D3S1358 green 98.00 148.00 14,15 4 0.107 none 12, 13, 14, 15, 16, 17, 18, 19,
+TH01 green 159.00 205.00 8,9.3 4 0.051 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
+D13S317 green 205.65 250.16 11 4 0.080 none 8, 9, 10, 11, 12, 13, 14, 15,
+D16S539 green 255.30 301.81 11,12 4 0.104 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
+D2S1338 green 304.80 370.31 19,23 4 0.111 none 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+D19S433 yellow 101.00 148.00 14,15 4 0.133 none 9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2,
+vWA yellow 151.00 213.50 17,18 4 0.126 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+TPOX yellow 216.99 260.99 8 4 0.048 none 6, 7, 8, 9, 10, 11, 12, 13,
+D18S51 yellow 264.49 350.00 15,19 4 0.170 none 7, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
+AMEL red 106.00 114.00 x 9 0.000 none X, Y,
+D5S818 red 128.00 180.00 11 4 0.068 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+FGA red 206.25 360.00 23,24 4 0.147 none 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30, 30.2, 31.2, 32.2, 33.2, 42.2, 43.2, 44.2, 45.2, 46.2, 47.2, 48.2, 50.2, 51.2,
+Panel SEfiler_v1
+D3S1358 blue 98.00 148.00 15,16 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
+vWA blue 151.00 213.50 14,16 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+D16S539 blue 229.00 279.00 9,10 4 0.130 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
+D2S1338 blue 284.00 350.00 20,23 4 0.150 none 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+AMEL green 106.00 114.00 x,Y 9 0.000 none X, Y"
+D8S1179 green 118.00 183.50 12,13 4 0.120 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+SE33 green 190.00 350.00 17,25.2 4 0.120 none 4.2, 6.3, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20.2, 21, 21.1, 21.2, 22.2, 23.2, 24.2, 25.2, 26.2, 27.2, 28.2, 29.2, 30.2, 31.2, 32.2, 33.2, 34.2, 35, 35.2, 36, 37,
+D19S433 yellow 101.00 148.00 14,15 4 0.133 none 9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2,
+TH01 yellow 159.00 205.00 7,9.3 4 0.060 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
+FGA yellow 206.25 360.00 24,26 4 0.110 none 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30, 30.2, 31.2, 32.2, 33.2, 42.2, 43.2, 44.2, 45.2, 46.2, 47.2, 48.2, 50.2, 51.2,
+D21S11 red 184.50 247.50 28,31 4 0.130 none 24, 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
+D18S51 red 264.49 350.00 12,15 4 0.170 none 7, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
+Panel Profiler_Plus_CODIS_v1
+D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
+vWA blue 151.00 203.00 17,18 4 0.110 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+FGA blue 206.25 360.00 23,24 4 0.110 none 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30,
+AMEL green 106.00 114.00 x 9 0.000 none X, Y,
+D8S1179 green 118.00 183.50 13 4 0.120 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+D21S11 green 184.50 247.50 30 4 0.130 none 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 38,
+D18S51 green 264.49 350.00 15,19 4 0.160 none 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+D5S818 yellow 128.00 180.00 11 4 0.010 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+D13S317 yellow 192.00 242.00 11 4 0.010 none 8, 9, 10, 11, 12, 13, 14, 15,
+D7S820 yellow 251.00 298.50 10,11 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+Panel COfiler_CODIS_v1
+D3S1358 blue 98.00 148.00 14,15 4 0.110 none 12, 13, 14, 15, 16, 17, 18, 19,
+D16S539 blue 229.00 279.00 11,12 4 0.130 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
+AMEL green 106.00 114.00 x 9 0.000 none X, Y,
+TH01 green 159.00 205.00 8,9.3 4 0.060 none 5, 6, 7, 8, 9, 9.3, 10,
+TPOX green 212.00 254.00 8 4 0.060 none 6, 7, 8, 9, 10, 11, 12, 13,
+CSF1PO green 275.00 323.00 10,12 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+D7S820 yellow 251.00 298.50 10,11 4 0.090 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+Panel Identifiler_CODIS_v1
+D8S1179 blue 118.00 183.50 13 4 0.082 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+D21S11 blue 184.50 247.50 30 4 0.094 none 24, 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
+D7S820 blue 251.00 298.50 10,11 4 0.082 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+CSF1PO blue 302.12 348.63 10,12 4 0.092 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+D3S1358 green 98.00 148.00 14,15 4 0.107 none 12, 13, 14, 15, 16, 17, 18, 19,
+TH01 green 159.00 205.00 8,9.3 4 0.051 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
+D13S317 green 205.65 250.16 11 4 0.080 none 8, 9, 10, 11, 12, 13, 14, 15,
+D16S539 green 255.30 301.81 11,12 4 0.104 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
+D2S1338 green 304.80 370.31 19,23 4 0.111 none 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+D19S433 yellow 101.00 148.00 14,15 4 0.133 none 9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2,
+vWA yellow 151.00 213.50 17,18 4 0.126 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+TPOX yellow 216.99 260.99 8 4 0.048 none 6, 7, 8, 9, 10, 11, 12, 13,
+D18S51 yellow 264.49 350.00 15,19 4 0.170 none 7, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
+AMEL red 106.00 114.00 x 9 0.000 none X, Y,
+D5S818 red 128.00 180.00 11 4 0.068 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+FGA red 206.25 360.00 23,24 4 0.147 none 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30, 30.2, 31.2, 32.2, 33.2, 42.2, 43.2, 44.2, 45.2, 46.2, 47.2, 48.2, 50.2, 51.2,
+Panel PowerPlex_16_v1 null
+D3S1358 blue 94.0 151.0 14,15 4 0.13 none 12, 13, 14, 15, 16, 17, 18, 19, 20,
+TH01 blue 152.0 195.0 8,9.3 4 0.06 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
+D21S11 blue 196.0 262.0 30 4 0.22 none 24, 24.2, 25, 25.2, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
+D18S51 blue 279.0 364.0 15,19 4 0.13 none 8, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
+Penta_E blue 370.0 480.0 12,13 5 0.13 none 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+D5S818 green 112.0 158.0 11 4 0.11 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+D13S317 green 166.0 208.0 11 4 0.12 none 7, 8, 9, 10, 11, 12, 13, 14, 15,
+D7S820 green 210.0 252.0 10,11 4 0.1 none 6, 7, 8, 9, 10, 11, 12, 13, 14,
+D16S539 green 260.0 310.0 11,12 4 0.13 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
+CSF1PO green 312.0 361.0 10,12 4 0.1 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+Penta_D green 365.0 449.0 12 5 0.06 none 2.2, 3.2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+AMEL yellow 102.0 112.0 x 9 0.0 none X, Y,
+vWA yellow 120.0 184.0 17,18 4 0.14 none 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+D8S1179 yellow 200.0 252.0 13 4 0.11 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+TPOX yellow 255.0 303.0 8 4 0.06 none 6, 7, 8, 9, 10, 11, 12, 13,
+FGA yellow 305.0 466.0 23,24 4 0.14 none 16, 17, 18, 18.2, 19, 19.2, 20, 20.2, 21, 21.2, 22, 22.2, 23, 23.2, 24, 24.2, 25, 25.2, 26, 27, 28, 29, 30, 31.2, 43.2, 44.2, 45.2, 46.2,
+Panel PowerPlex_12_v1 null
+D5S818 blue 107.0 152.0 11,12 4 0.13 none 7, 8, 9, 10, 11, 12, 13, 14, 15,
+D13S317 blue 159.0 206.0 8 4 0.13 none 7, 8, 9, 10, 11, 12, 13, 14, 15,
+D7S820 blue 207.0 249.0 9,11 4 0.13 none 6, 7, 8, 9, 10, 11, 12, 13, 14,
+D16S539 blue 253.0 309.0 11,12 4 0.13 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
+vWA yellow 118.0 170.0 16 4 0.13 none 11, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+TH01 yellow 172.0 204.0 9.3 4 0.13 none 5, 6, 7, 8, 9, 10, 11,
+AMEL yellow 208.0 217.0 x 9 0.0 none X, Y,
+TPOX yellow 218.0 258.0 8,9 4 0.13 none 6, 7, 8, 9, 10, 11, 12, 13,
+CSF1PO yellow 283.0 335.0 9,10 4 0.13 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+Panel Penta_E_v1 null
+Penta_E blue 370.0 480.0 12,13 5 0.13 none 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+Panel Penta_D_v1 null
+Penta_D green 365.0 449.0 12 5 0.06 none 2.2, 3.2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+Panel SE33_v1 null
+SE33 green 194.0 334.0 19,29.2 4 0.14 none 4.2, 6.3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20.2, 21, 21.2, 22, 22.2, 23.2, 24.2, 25.2, 26.2, 27.2, 28.2, 29.2, 30.2, 31.2, 32.2, 33.2, 34.2, 35, 36, 37,
+Panel FFFL_v1 null
+LPL blue 92.0 130.0 10,12 4 0.09 none 7, 9, 10, 11, 12, 13, 14,
+F13B blue 159.0 193.0 10 4 0.04 none 6, 7, 8, 9, 10, 11,
+FESFPS blue 213.0 251.0 10,12 4 0.07 none 7, 8, 9, 10, 11, 12, 13, 14,
+F13A01 blue 270.0 336.0 4,5 4 0.04 none 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16,
+Panel CTTV_v1 null
+vWA blue 126.0 168.0 16 4 0.14 none 13, 14, 15, 16, 17, 18, 19, 20,
+TH01 blue 169.0 203.5 9.3 4 0.06 none 5, 6, 7, 8, 9, 10, 11,
+AMEL blue 205.0 218.0 x 9 0.0 none X, Y,
+TPOX blue 219.0 253.0 8,9 4 0.06 none 6, 7, 8, 9, 10, 11, 12, 13,
+CSF1PO blue 282.0 327.0 9,10 4 0.1 none 7, 8, 9, 10, 11, 12, 13, 14, 15,
+Panel NextGen_GS500
+D10 blue 72.0 127.0 "12,15" 4 0.11 none "8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, "
+vWA blue 149.0 214.3 "14,16" 4 0.126 none "11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, "
+D16S539 blue 223.6 277.6 "9,10" 4 0.104 none "5, 8, 9, 10, 11, 12, 13, 14, 15, "
+D2S1338 blue 281.6 356.0 "20,23" 4 0.111 none "15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, "
+AMEL green 98.0 109.2 "x,y" 9 0 none "X, Y, "
+D8S1179 green 117.9 174.9 "12,13" 4 0.082 none "8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, "
+D21S11 green 178.8 249.8 "28,31" 4 0.094 none "24, 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38, "
+D18S51 green 259.5 347.5 "12,15" 4 0.17 none "7, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, "
+D22 yellow 74.0 120.0 "11,16" 3 0.15 none "8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19"
+D19S433 yellow 122.3 166.3 "14,15" 4 0.133 none "9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2, "
+TH01 yellow 176.4 221.1 "7,9.3" 4 0.051 none "4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3, "
+FGA yellow 221.6 372.0 "24,26" 4 0.147 none "17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30, 30.2, 31.2, 32.2, 33.2, 42.2, 43.2, 44.2, 45.2, 46.2, 47.2, 48.2, 50.2, 51.2, "
+D2S441 red 74.5 113.4 "14,15" 4 0.06 none "9, 10, 11, 11.3, 12, 13, 14, 15, 16, "
+D3S1358 red 114.4 168.4 "15,16" 4 0.107 none "12, 13, 14, 15, 16, 17, 18, 19, "
+D1S1656 red 170.0 224.0 "13,16" 4 0.12 none "9, 10, 11, 12, 13, 14, 14.3, 15, 15.3, 16, 16.3, 17, 17.3, 18.3, 19.3, 20.3, "
+D12S391 red 225 287.0 "18,19" 4 0.10 none "14, 15, 16, 17, 18, 19, 19.3, 20, 21, 22, 23, 24, 25, 26, 27, "
+Panel Prototype_PowerPlex_EP01 null
+AMEL blue 75.00 92.00 X, X 9 0.0 none X, Y
+D3S1358 blue 92.01 150.00 14,15 4 0.13 none 12, 13, 14, 15, 16, 17, 18, 19, 20,
+TH01 blue 150.01 195.00 8,9.3 4 0.06 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
+D21S11 blue 195.01 270.00 30 4 0.22 none 24, 24.2, 25, 25.2, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
+D18S51 blue 270.01 370.00 15,19 4 0.13 none 8, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
+D10S1248 green 70.00 127.00 13,15 4 0.12 none 8, 10, 11, 12, 13, 14, 15, 16, 17, 19
+D1S1656 green 127.01 180.00 18.3,18.3 4 0.12 none 9, 10, 11, 12, 13, 14, 14.3, 15, 15.3, 16, 16.3, 17, 17.3, 18, 18.3, 19, 19.3, 20.3
+D2S1338 green 180.01 265.00 19,23 4 0.11 none 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
+D16S539 green 265.01 330.00 11,12 4 0.13 none 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+D22S1045 yellow 70.00 118.00 11,14 3 0.20 none 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
+vWA yellow 118.01 200.00 17,18 4 0.14 none 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+D8S1179 yellow 200.01 257.50 13 4 0.11 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+FGA yellow 257.51 420.00 23,24 4 0.14 none 16, 17, 18, 18.2, 19, 19.2, 20, 20.2, 21, 21.2, 22, 22.2, 23, 23.2, 24, 24.2, 25, 25.2, 26, 27, 28, 29, 30, 31.2, 43.2, 44.2, 45.2, 46.2,
+D2S441 red 80.0 127.00 10,14 4 0.08 none 8, 10, 11, 11.3, 12, 13, 14, 15, 17,
+D12S391 red 127.01 188.00 18,20 4 0.15 none 14, 15, 16, 17, 17.3, 18, 18.3, 19, 20, 21, 22, 23, 24, 25, 26, 27,
+D19S433 red 188.01 262.00 14,15 4 0.14 none 5.2, 6.2, 8, 9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2, 18, 18.2, 20.2,
+Panel Identifiler_v2 null
+D8S1179 blue 90.0 184.5 13 4 0.082 none 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+D21S11 blue 184.0 250.5 30 4 0.094 none 24, 24.2, 25, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
+D7S820 blue 250.0 300.5 10,11 4 0.082 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+CSF1PO blue 300.0 400.0 10,12 4 0.092 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+D3S1358 green 90.0 155.5 14,15 4 0.107 none 12, 13, 14, 15, 16, 17, 18, 19,
+TH01 green 155.0 206.0 8,9.3 4 0.051 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
+D13S317 green 204.0 252.5 11 4 0.08 none 8, 9, 10, 11, 12, 13, 14, 15,
+D16S539 green 252.0 303.5 11,12 4 0.104 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
+D2S1338 green 301.0 400.0 19,23 4 0.111 none 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+D19S433 yellow 90.0 151.0 14,15 4 0.133 none 9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2,
+vWA yellow 148.5 215.5 17,18 4 0.126 none 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+TPOX yellow 215.0 262.5 8 4 0.048 none 6, 7, 8, 9, 10, 11, 12, 13,
+D18S51 yellow 262.0 400.0 15,19 4 0.17 none 7, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 14.2, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
+AMEL red 106.0 114.5 x 9 0.0 none X, Y,
+D5S818 red 114.0 190.5 11 4 0.068 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+FGA red 188.0 400.0 23,24 4 0.147 none 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26.2, 27, 28, 29, 30, 30.2, 31.2, 32.2, 33.2, 42.2, 43.2, 44.2, 45.2, 46.2, 47.2, 48.2, 50.2, 51.2,
diff --git a/inst/abif/Promega_Bins_v1.txt b/inst/abif/Promega_Bins_v1.txt
new file mode 100644
index 0000000..240df31
--- /dev/null
+++ b/inst/abif/Promega_Bins_v1.txt
@@ -0,0 +1 @@
+Version GM v 3.0
Chemistry Kit Promega_Panels_v1
BinSet Name Promega_Bins_ID3.2.0
Panel Name PowerPlex_16_v1
Marker Name D3S1358
18 136.0 0.5 0.5
19 140.0 0.5 0.5
20 144.0 0.5 0.5
21 148.0 0.5 0.5
11 108.0 0.5 0.5
12 112.0 0.5 0.5
13 116.0 0.5 0.5
14 120.0 0.5 0.5
15 124.0 0.5 0.5
15.2 126.0 0.5 0.5
16 128.0 0.5 0.5
17 132.0 0.5 0.5
Marker Name TH01
4 154.0 0.5 0.5
5 158.0 0.5 0.5
6 162.0 0.5 0.5
7 166.0 0.5 0.5
8 170.0 0.5 0.5
9 174.0 0.5 0.5
9.3 177.0 0.5 0.4
10 178.0 0.5 0.5
11 182.0 0.5 0.5
13.3 193.0 0.5 0.5
Marker Name D21S11
23.2 198.0 0.5 0.5
24 200.0 0.5 0.5
24.2 202.0 0.5 0.5
25 204.0 0.5 0.5
25.2 206.0 0.5 0.5
26 208.0 0.5 0.5
26.2 210.0 0.5 0.5
27 212.0 0.5 0.5
27.2 214.0 0.5 0.5
28 216.0 0.5 0.5
28.2 218.0 0.5 0.5
29 220.0 0.5 0.5
29.2 222.0 0.5 0.5
30 224.0 0.5 0.5
30.2 226.0 0.5 0.5
31 228.0 0.5 0.5
31.2 230.0 0.5 0.5
32 232.0 0.5 0.5
32.2 234.0 0.5 0.5
33 236.0 0.5 0.5
33.2 238.0 0.5 0.5
34 240.0 0.5 0.5
34.2 242.0 0.5 0.5
35 244.0 0.5 0.5
35.2 246.0 0.5 0.5
36 248.0 0.5 0.5
36.2 250.0 0.5 0.5
37 252.0 0.5 0.5
37.2 254.0 0.5 0.5
38 256.0 0.5 0.5
38.2 258.0 0.5 0.5
39 260.0 0.5 0.5
Marker Name D18S51
8 285.0 0.5 0.5
9 289.0 0.5 0.5
9.2 291.0 0.5 0.5
10 293.0 0.5 0.5
10.2 295.0 0.5 0.5
11 297.0 0.5 0.5
12 301.0 0.5 0.5
13 305.0 0.5 0.5
13.2 307.0 0.5 0.5
14 309.0 0.5 0.5
14.2 311.0 0.5 0.5
15 313.0 0.5 0.5
16 317.0 0.5 0.5
17 321.0 0.5 0.5
18 325.0 0.5 0.5
19 329.0 0.5 0.5
19.2 331.0 0.5 0.5
20 333.0 0.5 0.5
21 337.0 0.5 0.5
22 341.0 0.5 0.5
23 345.0 0.5 0.5
24 349.0 0.5 0.5
25 353.0 0.5 0.5
26 357.0 0.5 0.5
27 361.0 0.5 0.5
Marker Name Penta_E
7 387.0 0.5 0.5
8 392.0 0.5 0.5
9 397.0 0.5 0.5
10 402.0 0.5 0.5
11 407.0 0.5 0.5
12 412.0 0.5 0.5
13 417.0 0.5 0.5
14 422.0 0.5 0.5
15 427.0 0.5 0.5
16 432.0 0.5 0.5
17 437.0 0.5 0.5
18 442.0 0.5 0.5
19 447.0 0.5 0.5
20 452.0 0.5 0.5
21 457.0 0.5 0.5
22 462.0 0.5 0.5
23 467.0 0.5 0.5
24 472.0 0.5 0.5
25 477.0 0.5 0.5
4 372.0 0.5 0.5
5 377.0 0.5 0.5
6 382.0 0.5 0.5
Marker Name D5S818
7 115.0 0.5 0.5
8 119.0 0.5 0.5
9 123.0 0.5 0.5
10 127.0 0.5 0.5
11 131.0 0.5 0.5
12 135.0 0.5 0.5
13 139.0 0.5 0.5
14 143.0 0.5 0.5
15 147.0 0.5 0.5
16 151.0 0.5 0.5
Marker Name D13S317
7 173.0 0.5 0.5
8 177.0 0.5 0.5
9 181.0 0.5 0.5
10 185.0 0.5 0.5
11 189.0 0.5 0.5
12 193.0 0.5 0.5
13 197.0 0.5 0.5
14 201.0 0.5 0.5
15 205.0 0.5 0.5
Marker Name D7S820
6 213.0 0.5 0.5
7 217.0 0.5 0.5
8 221.0 0.5 0.5
9 225.0 0.5 0.5
10 229.0 0.5 0.5
11 233.0 0.5 0.5
12 237.0 0.5 0.5
13 241.0 0.5 0.5
14 245.0 0.5 0.5
Marker Name D16S539
5 263.0 0.5 0.5
8 275.0 0.5 0.5
9 279.0 0.5 0.5
10 283.0 0.5 0.5
11 287.0 0.5 0.5
12 291.0 0.5 0.5
13 295.0 0.5 0.5
14 299.0 0.5 0.5
15 303.0 0.5 0.5
Marker Name CSF1PO
6 318.0 0.5 0.5
7 322.0 0.5 0.5
8 326.0 0.5 0.5
9 330.0 0.5 0.5
10 334.0 0.5 0.5
11 338.0 0.5 0.5
12 342.0 0.5 0.5
13 346.0 0.5 0.5
14 350.0 0.5 0.5
15 354.0 0.5 0.5
Marker Name Penta_D
2.2 369.0 0.5 0.5
3.2 374.0 0.5 0.5
5 382.0 0.5 0.5
7 392.0 0.5 0.5
8 397.0 0.5 0.5
9 402.0 0.5 0.5
10 407.0 0.5 0.5
11 412.0 0.5 0.5
12 417.0 0.5 0.5
13 422.0 0.5 0.5
14 427.0 0.5 0.5
15 432.0 0.5 0.5
16 437.0 0.5 0.5
17 442.0 0.5 0.5
Marker Name AMEL
X 104.0 0.5 0.5
Y 110.0 0.5 0.5
Marker Name vWA
22 170.0 0.5 0.5
10 122.0 0.5 0.5
11 126.0 0.5 0.5
12 130.0 0.5 0.5
13 134.0 0.5 0.5
14 138.0 0.5 0.5
15 142.0 0.5 0.5
15.2 144.0 0.5 0.5
16 146.0 0.5 0.5
17 150.0 0.5 0.5
18 154.0 0.5 0.5
19 158.0 0.5 0.5
20 162.0 0.5 0.5
21 166.0 0.5 0.5
Marker Name D8S1179
7 202.0 0.5 0.5
8 206.0 0.5 0.5
9 210.0 0.5 0.5
10 214.0 0.5 0.5
11 218.0 0.5 0.5
12 222.0 0.5 0.5
13 226.0 0.5 0.5
14 230.0 0.5 0.5
15 234.0 0.5 0.5
16 238.0 0.5 0.5
17 242.0 0.5 0.5
18 246.0 0.5 0.5
19 250.0 0.5 0.5
Marker Name TPOX
5 257.0 0.5 0.5
6 261.0 0.5 0.5
7 265.0 0.5 0.5
8 269.0 0.5 0.5
9 273.0 0.5 0.5
10 277.0 0.5 0.5
11 281.0 0.5 0.5
12 285.0 0.5 0.5
13 289.0 0.5 0.5
14 293.0 0.5 0.5
Marker Name FGA
16 320.0 0.5 0.5
17 324.0 0.5 0.5
17.2 326.0 0.5 0.5
18 328.0 0.5 0.5
18.2 330.0 0.5 0.5
19 332.0 0.5 0.5
19.2 334.0 0.5 0.5
20 336.0 0.5 0.5
20.2 338.0 0.5 0.5
21 340.0 0.5 0.5
21.2 342.0 0.5 0.5
22 344.0 0.5 0.5
22.2 346.0 0.5 0.5
23 348.0 0.5 0.5
23.2 350.0 0.5 0.5
24 352.0 0.5 0.5
24.2 354.0 0.5 0.5
25 356.0 0.5 0.5
25.2 358.0 0.5 0.5
26 360.0 0.5 0.5
26.2 362.0 0.5 0.5
27 364.0 0.5 0.5
27.2 366.0 0.5 0.5
28 368.0 0.5 0.5
28.2 370.0 0.5 0.5
29 372.0 0.5 0.5
29.2 374.0 0.5 0.5
30 376.0 0.5 0.5
30.2 378.0 0.5 0.5
31 380.0 0.5 0.5
31.2 382.0 0.5 0.5
43.2 431.0 0.5 0.5
44.2 435.0 0.5 0.5
45.2 439.0 0.5 0.5
46.2 443.0 0.5 0.5
47.2 447.0 0.5 0.5
Panel Name PowerPlex_12_v1
Marker Name D5S818
7 114.0 0.5 0.5
8 118.0 0.5 0.5
9 122.0 0.5 0.5
10 126.0 0.5 0.5
11 130.0 0.5 0.5
12 134.0 0.5 0.5
13 138.0 0.5 0.5
14 142.0 0.5 0.5
15 146.0 0.5 0.5
16 150.0 0.5 0.5
Marker Name D13S317
7 170.0 0.5 0.5
8 174.0 0.5 0.5
9 178.0 0.5 0.5
10 182.0 0.5 0.5
11 186.0 0.5 0.5
12 190.0 0.5 0.5
13 194.0 0.5 0.5
14 198.0 0.5 0.5
15 202.0 0.5 0.5
Marker Name D7S820
6 213.0 0.5 0.5
7 217.0 0.5 0.5
8 221.0 0.5 0.5
9 225.0 0.5 0.5
10 229.0 0.5 0.5
11 233.0 0.5 0.5
12 237.0 0.5 0.5
13 241.0 0.5 0.5
14 245.0 0.5 0.5
Marker Name D16S539
5 262.0 0.5 0.5
6 266.0 0.5 0.5
7 270.0 0.5 0.5
8 274.0 0.5 0.5
9 278.0 0.5 0.5
10 282.0 0.5 0.5
11 286.0 0.5 0.5
12 290.0 0.5 0.5
13 294.0 0.5 0.5
14 298.0 0.5 0.5
15 302.0 0.5 0.5
Marker Name vWA
10 122.0 0.5 0.5
11 126.0 0.5 0.5
12 130.0 0.5 0.5
13 134.0 0.5 0.5
14 138.0 0.5 0.5
15 142.0 0.5 0.5
15.2 144.0 0.5 0.5
16 146.0 0.5 0.5
17 150.0 0.5 0.5
18 154.0 0.5 0.5
19 158.0 0.5 0.5
20 162.0 0.5 0.5
21 166.0 0.5 0.5
Marker Name TH01
5 176.0 0.5 0.5
6 180.0 0.5 0.5
7 184.0 0.5 0.5
8 188.0 0.5 0.5
9 192.0 0.5 0.5
9.3 195.0 0.5 0.4
10 196.0 0.5 0.5
11 200.0 0.5 0.5
Marker Name AMEL
X 210.0 0.5 0.5
Y 216.0 0.5 0.5
Marker Name TPOX
6 222.0 0.5 0.5
7 226.0 0.5 0.5
8 230.0 0.5 0.5
9 234.0 0.5 0.5
10 238.0 0.5 0.5
11 242.0 0.5 0.5
12 246.0 0.5 0.5
13 250.0 0.5 0.5
14 254.0 0.5 0.5
Marker Name CSF1PO
6 291.0 0.5 0.5
7 295.0 0.5 0.5
8 299.0 0.5 0.5
9 303.0 0.5 0.5
10 307.0 0.5 0.5
11 311.0 0.5 0.5
12 315.0 0.5 0.5
13 319.0 0.5 0.5
14 323.0 0.5 0.5
15 327.0 0.5 0.5
Panel Name Penta_E_v1
Marker Name Penta_E
4 372.0 0.5 0.5
5 377.0 0.5 0.5
6 382.0 0.5 0.5
7 387.0 0.5 0.5
8 392.0 0.5 0.5
9 397.0 0.5 0.5
10 402.0 0.5 0.5
11 407.0 0.5 0.5
12 412.0 0.5 0.5
13 417.0 0.5 0.5
14 422.0 0.5 0.5
15 427.0 0.5 0.5
16 432.0 0.5 0.5
17 437.0 0.5 0.5
18 442.0 0.5 0.5
19 447.0 0.5 0.5
20 452.0 0.5 0.5
21 457.0 0.5 0.5
22 462.0 0.5 0.5
23 467.0 0.5 0.5
24 472.0 0.5 0.5
25 477.0 0.5 0.5
Panel Name Penta_D_v1
Marker Name Penta_D
9 402.0 0.5 0.5
10 407.0 0.5 0.5
11 412.0 0.5 0.5
12 417.0 0.5 0.5
13 422.0 0.5 0.5
14 427.0 0.5 0.5
15 432.0 0.5 0.5
16 437.0 0.5 0.5
17 442.0 0.5 0.5
2.2 369.0 0.5 0.5
3.2 374.0 0.5 0.5
5 382.0 0.5 0.5
7 392.0 0.5 0.5
8 397.0 0.5 0.5
Panel Name SE33_v1
Marker Name SE33
4.2 196.0 0.5 0.5
6.3 205.0 0.5 0.5
7.2 208.0 0.5 0.5
8 210.0 0.5 0.5
8.2 212.0 0.5 0.5
9 214.0 0.5 0.5
9.2 216.0 0.5 0.5
10 218.0 0.5 0.5
10.2 220.0 0.5 0.5
11 222.0 0.5 0.5
11.2 224.0 0.5 0.5
12 226.0 0.5 0.5
12.2 228.0 0.5 0.5
13 230.0 0.5 0.5
13.2 232.0 0.5 0.5
14 234.0 0.5 0.5
14.2 236.0 0.5 0.4
14.3 237.0 0.4 0.4
15 238.0 0.4 0.5
15.2 240.0 0.5 0.5
16 242.0 0.5 0.5
16.2 244.0 0.5 0.5
17 246.0 0.5 0.5
17.2 248.0 0.5 0.5
18 250.0 0.5 0.5
18.2 252.0 0.5 0.5
19 254.0 0.5 0.5
19.2 256.0 0.5 0.5
20 258.0 0.5 0.5
20.2 260.0 0.5 0.5
21 262.0 0.5 0.4
21.1 263.0 0.4 0.4
21.2 264.0 0.4 0.5
22 266.0 0.5 0.5
22.2 268.0 0.5 0.5
23 270.0 0.5 0.5
23.2 272.0 0.5 0.5
24 274.0 0.5 0.5
24.2 276.0 0.5 0.5
25 278.0 0.5 0.5
25.2 280.0 0.5 0.5
26 282.0 0.5 0.5
26.2 284.0 0.5 0.5
27 286.0 0.5 0.5
27.2 288.0 0.5 0.5
28 290.0 0.5 0.5
28.2 292.0 0.5 0.5
29 294.0 0.5 0.5
29.2 296.0 0.5 0.5
30 298.0 0.5 0.5
30.2 300.0 0.5 0.5
31 302.0 0.5 0.5
31.2 304.0 0.5 0.5
32 306.0 0.5 0.4
32.1 307.0 0.4 0.4
32.2 308.0 0.4 0.5
33 310.0 0.5 0.5
33.2 312.0 0.5 0.5
34 314.0 0.5 0.5
34.2 316.0 0.5 0.5
35 318.0 0.5 0.5
35.2 320.0 0.5 0.5
36 322.0 0.5 0.5
36.2 324.0 0.5 0.5
37 326.0 0.5 0.5
37.2 328.0 0.5 0.5
38 330.0 0.5 0.5
Panel Name FFFL_v1
Marker Name LPL
7 97.7 0.5 0.5
8 101.7 0.5 0.5
9 105.7 0.5 0.5
10 109.7 0.5 0.5
11 113.7 0.5 0.5
12 117.7 0.5 0.5
13 121.7 0.5 0.5
14 125.7 0.5 0.5
Marker Name F13B
6 164.3 0.5 0.5
7 168.3 0.5 0.5
8 172.3 0.5 0.5
9 176.3 0.5 0.5
10 180.3 0.5 0.5
11 184.2 0.5 0.5
12 188.19 0.5 0.5
Marker Name FESFPS
7 218.0 0.5 0.5
8 222.0 0.5 0.5
9 226.0 0.5 0.5
10 230.0 0.5 0.5
11 234.0 0.5 0.5
12 238.0 0.5 0.5
13 242.0 0.5 0.5
14 246.04 0.5 0.5
Marker Name F13A01
3.2 276.68 0.5 0.5
4 278.7 0.5 0.5
5 282.7 0.5 0.5
6 286.7 0.5 0.5
7 290.7 0.5 0.5
8 294.7 0.5 0.5
9 298.7 0.5 0.5
10 302.27 0.5 0.5
11 305.7 0.5 0.5
12 309.4 0.5 0.5
13 313.3 0.5 0.5
14 317.2 0.5 0.5
15 321.2 0.5 0.5
16 325.1 0.5 0.5
Panel Name CTTV_v1
Marker Name vWA
13 139.0 0.5 0.5
14 143.0 0.5 0.5
15 147.0 0.5 0.5
16 151.0 0.5 0.5
17 155.0 0.5 0.5
18 159.0 0.5 0.5
19 163.0 0.5 0.5
20 167.0 0.5 0.5
Marker Name TH01
8 191.4 0.5 0.5
9 195.4 0.5 0.5
10 199.4 0.5 0.5
11 202.97 0.5 0.5
6 183.4 0.5 0.5
7 187.4 0.5 0.5
5 179.4 0.5 0.5
Marker Name AMEL
x 212.08 0.5 0.5
Y 217.38 0.5 0.5
Marker Name TPOX
6 224.0 0.5 0.5
7 228.0 0.5 0.5
8 232.0 0.5 0.5
9 236.0 0.5 0.5
10 240.0 0.5 0.5
11 244.0 0.5 0.5
12 248.0 0.5 0.5
13 252.03 0.5 0.5
Marker Name CSF1PO
7 294.5 0.5 0.5
8 298.5 0.5 0.5
9 302.5 0.5 0.5
10 306.5 0.5 0.5
11 310.5 0.5 0.5
12 314.5 0.5 0.5
13 318.5 0.5 0.5
14 322.5 0.5 0.5
15 326.32 0.5 0.5
\ No newline at end of file
diff --git a/inst/abif/Promega_Panels_v1.txt b/inst/abif/Promega_Panels_v1.txt
new file mode 100644
index 0000000..24f8096
--- /dev/null
+++ b/inst/abif/Promega_Panels_v1.txt
@@ -0,0 +1 @@
+Version GM v 3.0
Kit type: MICROSATELLITE
Chemistry Kit Promega_Panels_v1 none
Panel PowerPlex_16_v1 null
TPOX yellow 255.0 303.0 8 4 0.06 none 6, 7, 8, 9, 10, 11, 12, 13,
FGA yellow 305.0 466.0 23,24 4 0.14 none 16, 17, 18, 18.2, 19, 19.2, 20, 20.2, 21, 21.2, 22, 22.2, 23, 23.2, 24, 24.2, 25, 25.2, 26, 27, 28, 29, 30, 31.2, 43.2, 44.2, 45.2, 46.2,
D3S1358 blue 94.0 151.0 14,15 4 0.13 none 12, 13, 14, 15, 16, 17, 18, 19, 20,
TH01 blue 152.0 195.0 8,9.3 4 0.06 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
D21S11 blue 196.0 262.0 30 4 0.22 none 24, 24.2, 25, 25.2, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
D18S51 blue 279.0 364.0 15,19 4 0.13 none 8, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
Penta_E blue 370.0 480.0 12,13 5 0.13 none 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
D5S818 green 112.0 158.0 11 4 0.11 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
D13S317 green 166.0 208.0 11 4 0.12 none 7, 8, 9, 10, 11, 12, 13, 14, 15,
D7S820 green 210.0 252.0 10,11 4 0.1 none 6, 7, 8, 9, 10, 11, 12, 13, 14,
D16S539 green 260.0 310.0 11,12 4 0.13 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
CSF1PO green 312.0 361.0 10,12 4 0.1 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
Penta_D green 365.0 449.0 12 5 0.06 none 2.2, 3.2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
AMEL yellow 102.0 112.0 x 9 0.0 none X, Y,
vWA yellow 120.0 184.0 17,18 4 0.14 none 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
D8S1179 yellow 200.0 252.0 13 4 0.11 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
Panel PowerPlex_12_v1 null
D5S818 blue 107.0 152.0 11,12 4 0.13 none 7, 8, 9, 10, 11, 12, 13, 14, 15,
D13S317 blue 159.0 206.0 8 4 0.13 none 7, 8, 9, 10, 11, 12, 13, 14, 15,
D7S820 blue 207.0 249.0 9,11 4 0.13 none 6, 7, 8, 9, 10, 11, 12, 13, 14,
D16S539 blue 253.0 309.0 11,12 4 0.13 none 5, 8, 9, 10, 11, 12, 13, 14, 15,
vWA yellow 118.0 170.0 16 4 0.13 none 11, 13, 14, 15, 16, 17, 18, 19, 20, 21,
TH01 yellow 172.0 204.0 9.3 4 0.13 none 5, 6, 7, 8, 9, 10, 11,
AMEL yellow 208.0 217.0 x 9 0.0 none X, Y,
TPOX yellow 218.0 258.0 8,9 4 0.13 none 6, 7, 8, 9, 10, 11, 12, 13,
CSF1PO yellow 283.0 335.0 9,10 4 0.13 none 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
Panel Penta_E_v1 null
Penta_E blue 370.0 480.0 12,13 5 0.13 none 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
Panel Penta_D_v1 null
Penta_D green 365.0 449.0 12 5 0.06 none 2.2, 3.2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
Panel SE33_v1 null
SE33 green 194.0 334.0 19,29.2 4 0.14 none 4.2, 6.3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20.2, 21, 21.2, 22, 22.2, 23.2, 24.2, 25.2, 26.2, 27.2, 28.2, 29.2, 30.2, 31.2, 32.2, 33.2, 34.2, 35, 36, 37,
Panel FFFL_v1 null
LPL blue 92.0 130.0 10,12 4 0.09 none 7, 9, 10, 11, 12, 13, 14,
F13B blue 159.0 193.0 10 4 0.04 none 6, 7, 8, 9, 10, 11,
FESFPS blue 213.0 251.0 10,12 4 0.07 none 7, 8, 9, 10, 11, 12, 13, 14,
F13A01 blue 270.0 336.0 4,5 4 0.04 none 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16,
Panel CTTV_v1 null
vWA blue 126.0 168.0 16 4 0.14 none 13, 14, 15, 16, 17, 18, 19, 20,
TH01 blue 169.0 203.5 9.3 4 0.06 none 5, 6, 7, 8, 9, 10, 11,
AMEL blue 205.0 218.0 x 9 0.0 none X, Y,
TPOX blue 219.0 253.0 8,9 4 0.06 none 6, 7, 8, 9, 10, 11, 12, 13,
CSF1PO blue 282.0 327.0 9,10 4 0.1 none 7, 8, 9, 10, 11, 12, 13, 14, 15,
\ No newline at end of file
diff --git a/inst/abif/Prototype_PowerPlex_EP01_Bins.txt b/inst/abif/Prototype_PowerPlex_EP01_Bins.txt
new file mode 100644
index 0000000..8975370
--- /dev/null
+++ b/inst/abif/Prototype_PowerPlex_EP01_Bins.txt
@@ -0,0 +1 @@
+#GeneMapper ID v3.2
Version GM v 3.0
Chemistry Kit Prototype_PowerPlex_EP01
BinSet Name Prototype_PowerPlex_EP01
Panel Name Prototype_PowerPlex_EP01
Marker Name AMEL
X 82.01 0.5 0.5
Y 87.86 0.5 0.5
Marker Name D3S1358
8 94.5 0.5 0.5
9 98.56 0.5 0.5
10 102.62 0.5 0.5
11 106.78 0.5 0.5
12 110.84 0.5 0.5
13 114.90 0.5 0.5
14 119.00 0.5 0.5
15 123.10 0.5 0.5
15.2 125.15 0.5 0.5
16 127.20 0.5 0.5
17 131.31 0.5 0.5
18 135.41 0.5 0.5
19 139.50 0.5 0.5
20 143.62 0.5 0.5
21 147.72 0.5 0.5
Marker Name TH01
4 152.84 0.5 0.5
5 156.83 0.5 0.5
6 160.95 0.5 0.5
7 164.90 0.5 0.5
8 168.85 0.5 0.5
9 172.79 0.5 0.5
9.3 175.74 0.5 0.4
10 176.78 0.5 0.5
11 180.77 0.5 0.5
13.3 191.77 0.5 0.5
Marker Name D21S11
23.2 196.86 0.5 0.5
24 198.88 0.5 0.5
24.2 200.91 0.5 0.5
25 202.93 0.5 0.5
25.2 204.96 0.5 0.5
26 206.98 0.5 0.5
26.2 209.01 0.5 0.5
27 211.03 0.5 0.5
27.2 213.06 0.5 0.5
28 215.08 0.5 0.5
28.2 217.09 0.5 0.5
29 219.10 0.5 0.5
29.2 221.12 0.5 0.5
30 223.14 0.5 0.5
30.2 225.16 0.5 0.5
31 227.19 0.5 0.5
31.2 229.20 0.5 0.5
32 231.23 0.5 0.5
32.2 233.25 0.5 0.5
33 235.28 0.5 0.5
33.2 237.29 0.5 0.5
34 239.32 0.5 0.5
34.2 241.34 0.5 0.5
35 243.37 0.5 0.5
35.2 245.38 0.5 0.5
36 247.42 0.5 0.5
36.2 249.45 0.5 0.5
37 251.47 0.5 0.5
37.2 253.50 0.5 0.5
38 255.52 0.5 0.5
38.2 257.64 0.5 0.5
39 259.73 0.5 0.5
Marker Name D18S51
7 280.22 0.5 0.5
8 284.13 0.5 0.5
9 288.05 0.5 0.5
9.2 290.00 0.5 0.5
10 291.96 0.5 0.5
10.2 293.92 0.5 0.5
11 295.88 0.5 0.5
12 299.79 0.5 0.5
13 303.71 0.5 0.5
13.2 305.67 0.5 0.5
14 307.62 0.5 0.5
14.2 309.49 0.5 0.5
15 311.36 0.5 0.5
16 315.27 0.5 0.5
17 319.14 0.5 0.5
18 323.03 0.5 0.5
19 326.93 0.5 0.5
19.2 328.98 0.5 0.5
20 330.83 0.5 0.5
21 334.73 0.5 0.5
22 338.63 0.5 0.5
23 342.53 0.5 0.5
24 346.48 0.5 0.5
25 350.43 0.5 0.5
26 354.33 0.5 0.5
27 358.23 0.5 0.5
Marker Name D10S1248
8 77.18 0.5 0.5
9 81.25 0.5 0.5
10 85.32 0.5 0.5
11 89.39 0.5 0.5
12 93.49 0.5 0.5
13 97.55 0.5 0.5
14 101.66 0.5 0.5
15 105.81 0.5 0.5
16 109.98 0.5 0.5
17 114.10 0.5 0.5
18 118.23 0.5 0.5
19 122.36 0.5 0.5
Marker Name D1S1656
9 129.83 0.5 0.5
10 133.95 0.5 0.5
11 138.08 0.5 0.5
12 142.16 0.5 0.5
13 146.17 0.5 0.5
13.3 149.23 0.5 0.4
14 150.26 0.5 0.5
14.3 153.35 0.5 0.4
15 154.35 0.5 0.5
15.3 157.37 0.5 0.4
16 158.45 0.5 0.5
16.3 161.46 0.5 0.4
17 162.46 0.5 0.4
17.1 163.48 0.5 0.5
17.3 165.55 0.5 0.4
18 166.47 0.5 0.5
18.3 169.48 0.5 0.4
19 170.56 0.5 0.5
19.3 173.58 0.5 0.5
20.3 177.60 0.5 0.5
Marker Name D2S1338
10 191.80 0.5 0.5
11 195.77 0.5 0.5
12 199.74 0.5 0.5
13 203.71 0.5 0.5
14 207.68 0.5 0.5
15 211.65 0.5 0.5
16 215.62 0.5 0.5
17 219.59 0.5 0.5
18 223.56 0.5 0.5
19 227.53 0.5 0.5
19.3 230.53 0.5 0.4
20 231.60 0.5 0.5
21 235.55 0.5 0.5
22 239.49 0.5 0.5
23 243.49 0.5 0.5
23.2 245.50 0.5 0.4
23.3 246.48 0.5 0.4
24 247.48 0.5 0.5
25 251.47 0.5 0.5
26 255.46 0.5 0.5
27 259.45 0.5 0.5
28 263.36 0.5 0.5
Marker Name D16S539
4 269.30 0.5 0.5
5 273.37 0.5 0.5
6 277.41 0.5 0.5
7 281.44 0.5 0.5
8 285.31 0.5 0.5
9 289.34 0.5 0.5
10 293.38 0.5 0.5
11 297.33 0.5 0.5
12 301.30 0.5 0.5
13 305.39 0.5 0.5
14 309.40 0.5 0.5
15 313.43 0.5 0.5
16 317.55 0.5 0.5
Marker Name D22S1045
6 75.54 0.5 0.5
7 78.52 0.5 0.5
8 81.49 0.5 0.5
9 84.45 0.5 0.5
10 87.42 0.5 0.5
11 90.41 0.5 0.5
12 93.34 0.5 0.5
13 96.36 0.5 0.5
14 99.33 0.5 0.5
15 102.41 0.5 0.5
16 105.43 0.5 0.5
17 108.46 0.5 0.5
18 111.50 0.5 0.5
19 114.56 0.5 0.5
Marker Name vWA
10 123.27 0.5 0.5
11 127.31 0.5 0.5
12 131.28 0.5 0.5
13 135.41 0.5 0.5
14 139.46 0.5 0.5
15 143.47 0.5 0.5
16 147.55 0.5 0.5
17 151.57 0.5 0.5
18 155.59 0.5 0.5
19 159.61 0.5 0.5
20 163.62 0.5 0.5
21 167.55 0.5 0.5
22 171.49 0.5 0.5
23 175.42 0.5 0.5
24 179.38 0.5 0.5
25 183.36 0.5 0.5
Marker Name D8S1179
7 203.77 0.5 0.5
8 207.78 0.5 0.5
9 211.81 0.5 0.5
10 215.77 0.5 0.5
11 219.82 0.5 0.5
12 223.80 0.5 0.5
13 227.81 0.5 0.5
14 231.84 0.5 0.5
15 235.88 0.5 0.5
16 239.92 0.5 0.5
17 243.98 0.5 0.5
18 248.13 0.5 0.5
19 252.20 0.5 0.5
20 256.27 0.5 0.5
Marker Name FGA
12.2 258.76 0.5 0.5
16 272.13 0.5 0.5
17 275.95 0.5 0.5
17.2 277.86 0.5 0.5
18 279.77 0.5 0.5
18.2 281.68 0.5 0.5
19 283.59 0.5 0.5
19.2 285.50 0.5 0.5
20 287.41 0.5 0.5
20.2 289.32 0.5 0.5
21 291.23 0.5 0.5
21.2 293.14 0.5 0.5
22 295.06 0.5 0.5
22.2 296.97 0.5 0.5
23 298.88 0.5 0.5
23.2 300.80 0.5 0.5
24 302.73 0.5 0.5
24.2 304.67 0.5 0.5
25 306.51 0.5 0.5
25.2 308.46 0.5 0.5
26 310.38 0.5 0.5
26.2 312.36 0.5 0.5
27 314.33 0.5 0.5
27.2 316.24 0.5 0.5
28 318.15 0.5 0.5
28.2 320.11 0.5 0.5
29 322.06 0.5 0.5
29.2 323.99 0.5 0.5
30 325.91 0.5 0.5
30.2 327.84 0.5 0.5
31 329.97 0.5 0.5
31.2 332.09 0.5 0.5
43.2 375.23 0.5 0.5
44.2 379.10 0.5 0.5
45.2 382.99 0.5 0.5
46.2 386.97 0.5 0.5
47.2 390.66 0.5 0.5
50.3 402.65 0.5 0.5
Marker Name D2S441
8 88.29 0.5 0.5
9 92.37 0.5 0.5
10 96.44 0.5 0.5
11 100.60 0.5 0.5
11.3 103.62 0.5 0.4
12 104.68 0.5 0.5
12.3 107.78 0.5 0.4
13 108.84 0.5 0.5
13.3 111.95 0.5 0.4
14 113.03 0.5 0.5
14.3 116.10 0.5 0.4
15 117.16 0.5 0.5
16 121.28 0.5 0.5
17 125.40 0.5 0.5
Marker Name D12S391
13 128.75 0.5 0.5
14 132.88 0.5 0.5
15 137.01 0.5 0.5
16 140.92 0.5 0.5
17 145.09 0.5 0.5
17.3 148.02 0.5 0.4
18 149.10 0.5 0.5
18.3 152.11 0.5 0.5
19 153.19 0.5 0.5
19.3 156.15 0.5 0.5
20 157.21 0.5 0.5
21 161.23 0.5 0.5
22 165.32 0.5 0.5
23 169.48 0.5 0.5
24 173.42 0.5 0.5
25 177.45 0.5 0.5
26 181.48 0.5 0.5
27 185.45 0.5 0.5
Marker Name D19S433
5.2 193.33 0.5 0.5
6.2 197.25 0.5 0.5
7 199.22 0.5 0.5
8 203.14 0.5 0.5
9 207.07 0.5 0.5
9.2 209.05 0.5 0.5
10 211.02 0.5 0.5
10.2 212.96 0.5 0.5
11 214.90 0.5 0.5
11.2 216.89 0.5 0.5
12 218.87 0.5 0.5
12.2 220.85 0.5 0.5
13 222.85 0.5 0.5
13.2 224.76 0.5 0.5
14 226.77 0.5 0.5
14.2 228.78 0.5 0.5
15 230.71 0.5 0.5
15.2 232.73 0.5 0.5
16 234.67 0.5 0.5
16.2 236.69 0.5 0.5
17 238.71 0.5 0.5
17.2 240.65 0.5 0.5
18 242.68 0.5 0.5
18.2 244.71 0.5 0.5
19 246.61 0.5 0.5
19.2 248.53 0.5 0.5
20.2 252.69 0.5 0.5
\ No newline at end of file
diff --git a/inst/abif/Prototype_PowerPlex_EP01_Pa.txt b/inst/abif/Prototype_PowerPlex_EP01_Pa.txt
new file mode 100644
index 0000000..762829a
--- /dev/null
+++ b/inst/abif/Prototype_PowerPlex_EP01_Pa.txt
@@ -0,0 +1 @@
+#GeneMapper ID v3.2
Version GM v 3.0
Kit type: MICROSATELLITE
Chemistry Kit Prototype_PowerPlex_EP01 none
Panel Prototype_PowerPlex_EP01 null
AMEL blue 75.00 92.00 X, X 9 0.0 none X, Y
D3S1358 blue 92.01 150.00 14,15 4 0.13 none 12, 13, 14, 15, 16, 17, 18, 19, 20,
TH01 blue 150.01 195.00 8,9.3 4 0.06 none 4, 5, 6, 7, 8, 9, 9.3, 10, 11, 13.3,
D21S11 blue 195.01 270.00 30 4 0.22 none 24, 24.2, 25, 25.2, 26, 27, 28, 28.2, 29, 29.2, 30, 30.2, 31, 31.2, 32, 32.2, 33, 33.2, 34, 34.2, 35, 35.2, 36, 37, 38,
D18S51 blue 270.01 370.00 15,19 4 0.13 none 8, 9, 10, 10.2, 11, 12, 13, 13.2, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
D10S1248 green 70.00 127.00 13,15 4 0.12 none 8, 10, 11, 12, 13, 14, 15, 16, 17, 19
D1S1656 green 127.01 180.00 18.3,18.3 4 0.12 none 9, 10, 11, 12, 13, 14, 14.3, 15, 15.3, 16, 16.3, 17, 17.3, 18, 18.3, 19, 19.3, 20.3
D2S1338 green 180.01 265.00 19,23 4 0.11 none 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
D16S539 green 265.01 330.00 11,12 4 0.13 none 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
D22S1045 yellow 70.00 118.00 11,14 3 0.20 none 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
vWA yellow 118.01 200.00 17,18 4 0.14 none 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
D8S1179 yellow 200.01 257.50 13 4 0.11 none 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
FGA yellow 257.51 420.00 23,24 4 0.14 none 16, 17, 18, 18.2, 19, 19.2, 20, 20.2, 21, 21.2, 22, 22.2, 23, 23.2, 24, 24.2, 25, 25.2, 26, 27, 28, 29, 30, 31.2, 43.2, 44.2, 45.2, 46.2,
D2S441 red 80.0 127.00 10,14 4 0.08 none 8, 10, 11, 11.3, 12, 13, 14, 15, 17,
D12S391 red 127.01 188.00 18,20 4 0.15 none 14, 15, 16, 17, 17.3, 18, 18.3, 19, 20, 21, 22, 23, 24, 25, 26, 27,
D19S433 red 188.01 262.00 14,15 4 0.14 none 5.2, 6.2, 8, 9, 10, 11, 12, 12.2, 13, 13.2, 14, 14.2, 15, 15.2, 16, 16.2, 17, 17.2, 18, 18.2, 20.2,
\ No newline at end of file
diff --git a/inst/abif/samplefsa2ps.fsa b/inst/abif/samplefsa2ps.fsa
new file mode 100755
index 0000000..a07f015
Binary files /dev/null and b/inst/abif/samplefsa2ps.fsa differ
diff --git a/inst/sequences/Anouk.fasta b/inst/sequences/Anouk.fasta
new file mode 100644
index 0000000..c040e99
--- /dev/null
+++ b/inst/sequences/Anouk.fasta
@@ -0,0 +1,70 @@
+>LmjF01.0030
+ATGATGTCGGCCGAGCCGCCGTCGTCGCAGCCGTACATCAGCGACGTGCTGCGGCGGTAC
+CAGCTGGAGCGCTTTCAGTGTGCCTTTGCATCGAGCATGACCATCAAGGACCTCCTCGCC
+CTGCAGCCAGAGGACTTCAACCGCTACGGCGTCGTAGAGGCGATGGACATTTTGCGGCTG
+CGTGACGCCATCGAGTACATCAAGGCTAATCCGCTCCCCGCCTCGCGCTCTGGCAGTGAC
+GTGCTCGACAACGACGGCGACGGCGACGGCGACGACAGTACGCCGGAGGGGAAGGAGGGG
+TGCTCGACGGAGCGCCGGCGGCAGTACACAGCACGCGGAACCACAGTCCTTTGCCGGTCG
+ACCGACACCGCCGAGGAGGTGAAGCGCAAGAGCCGCATCCTCGTCGCCATTCGCAAGCGT
+CCGCTCAGCGCCGGGGAGCAGACGAACGGCTTCACGGACATCATGGACGCCGACAACAGC
+GGCGAGATTGTGCTGAAGGAGCCAAAGGTGAAGGTCGACCTCCGCAAGTACACCCACGTG
+CACCGCTTCTTCTTCGACGAGGTTTTCGACGAGGCCTGCGACAACGTCGACGTGTACAAC
+CGCGCTGCCCGCGCGCTGATCGACACCGTCTTCGACGGCGGCTGCGCGACATGCTTCGCC
+TATGGACAGACAGGGAGCGGCAAGACACACACGATGCTGGGCAAGGGCCCCGAGCCGGGC
+CTCTACGCACTCGCCGCCAAAGACATGTTTGACCGCCTCACGAGCGACACGCGCATCGTC
+GTTTCCTTTTACGAGATCTACAGCGGGAAGCTCTTTGACTTGCTGAACGGCCGGCGACCC
+CTGCGAGCCCTCGAGGACGACAAGGGCCGGGTGAACATCCGCGGCCTCACCGAACACTGC
+TCTACCAGCGTGGAGGACCTCATGACGATCATCGACCAGGGCAGCGGTGTTCGCAGCTGC
+GGCTCCACCGGCGCCAATGACACAAGCTCCCGCTCCCACGCCATTCTCGAGATCAAGCTC
+AAGGCGAAACGGACGTCGAAGCAGAGCGGCAAGTTCACGTTCATCGACCTCGCTGGAAGC
+GAGCGCGGCGCTGACACGGTGGACTGCGCGCGACAGACACGCCTCGAAGGGGCGGAGATC
+AACAAGAGCCTACTCGCGCTGAAGGAGTGCATTCGTTTTTTAGATCAGAACAGGAAGCAC
+GTCCCGTTCCGCGGCTCGAAGCTGACTGAGGTGCTCCGCGACTCGTTTATCGGCAACTGC
+CGCACGGTGATGATCGGCGCCGTCTCTCCGTCGAACAACAATGCCGAGCACACGCTGAAC
+ACGCTGCGCTACGCCGATCGTGTCAAGGAGCTGAAGCGCAACGCCACGGAGCGGCGCACT
+GTGTGCATGCCCGACGACCAGGAAGAGGCCTTCTTTGACACGACCGAGAGCAGGCCACCG
+TCGCGGAGGACGACAACTCGCCTTTCTACGGCCGCCCCGCTTTTCTCCGGCTCTTCGACG
+GCTGCGCCAGCACTTAGAAGCACGCTACTCAGCAGCCGCTCCGTCAACACACTCTCGCCG
+TCGTCGCAGGCCAAGTCGACTCTCGTCACCCCGAAGCCGCCGTCGCGCGATCGGACTCCG
+GACATGGTGTGCACTAAGCGGCCCCGCGACTCAGACAGAAGCGGCGAGGACGAAGTGGTA
+GCGCGGCCGAGTGGGCGCCCAAGCTTCAAGCGCTTCGAGAGCGGCGCCGAGCTTGTCGCG
+GCCCAGCGCAGTCGCGTCATTGACCAATACAACGCCTACCTCGAGACGGACATGAACTGT
+ATCAAGGAGGAGTACCAGGTGAAGTACGACGCAGAGCAGATGAACGCCAACACGCGCAGC
+TTTGTGGAGCGCGCACGTCTGCTGGTGAGCGAGAAACGGCGCGCGATGGAGTCCTTCCTA
+ACGCAGCTGGAGGAGCTCGACAAGATCGCGCAGCAGGTCGCCGACATCACCGCCTTTCAG
+CAGCACCTGCCGCCAACG
+>LinJ01.0030
+ATGATGTCGGCCGAGCCGCCGTCGTCGCAGCCGTACATCAGCGACGTGCTGCGGCGGTAC
+CAGCTGGAGCGCTTTCAGAGTTCCTTTGCATCGAGCATGACCATCAAGGACCTCCTCGCC
+CTGCAGCCGGAGGACTTCAACCGCTACGGCGTCGTAGAGGCAATGGACATTTTGCGGCTG
+CGCGACGCCATCGAGTACATCAAGGCCAACCCGCTCCCCGCCTCGCGCTCCGGCAGTGAC
+GTGCTCGACAACGACGGCGACGGCGACGGCGACGACAGTACGCCGGAGGGGAAGGAGGGG
+TGCTCGACGGAGCGCCGACGGCAGTACACAGCACGCGGAACCACCGTCCTTTGCGGGTCG
+ACCGACACCGCCGAGGAGGTGAAGCGCAAGAGCCGCATCATCGTCGCCATTCGCAAGCGT
+CCGCTCAGCGCCGGGGAGCAGACGAACGGCTTCACGGACATCATGGACGCCGACAACAAC
+GGCGAGATTGTGCTGAAGGAGCCAAAGGTGAAGGTCGACCTCCGCAAGTACACCCACGTG
+CACCGCTTCTTCTTCGACGAGGTTTTCGACGAGGCGTGCGACAACGTCGACGTGTACAAC
+CGCGCTGCCCGCGCGCTGATCGACACCGTCTTCGACGGCGGCTGCGCGACATGCTTCGCC
+TATGGGCAGACAGGGAGCGGCAAGACACACACGATGCTCGGCAAGGGCCCCGAGCCGGGC
+CTGTACGCACTCGCCGCCAAAGACATGTTTGACCGCCTCACGAGCGACACGCGCATCGTT
+GTTTCCTTTTACGAGATCTACAGCGGGAAGCTCTTTGACTTGCTGAACGGCCGGCGACCA
+CTGCGAGCCCTCGAGGACGACAAGGGGAGGGTGAACATCCGCGGCCTCACCGAACACTGC
+TCTACCAGCGTGGAGGACCTCATGACGATCATCGACCAGGGCAGCGGCGTTCGCAGCTGC
+GGCTCCACCGGCGCCAACGACACGAGCTCCCGCTCCCACGCCATTCTCGAGATCAAGCTC
+AAGGCGAAACGGACGTCGAAGCAGAGCGGCAAGTTCACATTCATCGACCTCGCTGGAAGC
+GAGCGCGGCGCCGACACGGTGGATTGCGCGCGACAGACACGCCTCGAAGGGGCGGAGATT
+AACAAGAGCCTACTCGCTCTGAAGGAGTGCATTCGTTTTTTAGATCAGAACAGGAAGCAC
+GTCCCGTTCCGCGGCTCGAAGCTGACTGAGGTGCTCCGCGACTCGTTTATCGGCAACTGC
+CGCACGGTGATGATCGGCGCCGTCTCTCCGTCCAACAACAATGCCGAGCACACGCTGAAC
+ACGTTGCGCTACGCCGATCGCGTCAAGGAGCTGAAGCGCAACGCCACGGAGCGGCGCACC
+GTGTGCGTGCCCAACGACCAGGAAGAGGCCTTCTTTGACACGACCGAGAGCAGGCCACCG
+TCGCGGAGGACGACAACTCGGCTTTCTGCGGCCGCCCCGCTTTTCTCCGGCACTTCGACG
+GCTGCCCCAGCATGTAAAAGCACGTTGCTCAGCAGCCGCTCCGTCAACACACTCTCGCCG
+TCGTCGCAGGGCAAGTCGACTCTCGTCACCCCGAAGCCACTGTCGCGCGATCGGACTCCG
+GACATGGTGTGCGCTAAGCGGCCCCGCGACTCAGACCGAAGCGGCGAAGACGAAGTGGTG
+GCGCGGCCGAGTGGGCGCCCAAGCTTCAAGCGCTTCGAGGGCGGCGCCGAGCTCGTGGCG
+GCCCAGCGCAGTCGTGTCATTGACCAATACAACGCCTACCTCGAGACGGACATGAACTGT
+ATCAAGGAGGAGTACCAGGTGAAGTACGACGCAGAGCAGATGAACGCCAACACGCGCACC
+TTTGTCGAGCGCGCACGCCTGCTGGTGAGCGAGAAGCGGCGCGCGATGGAGTCCTTCCTA
+ACGCAGCTGGACGAGCTCGATAAGATCGCGCAGCAGGTCGCCAGCATCACCGCCTTTCAG
+CAGCACCTGCCGCCAACG
diff --git a/inst/sequences/DarrenObbard.fasta b/inst/sequences/DarrenObbard.fasta
new file mode 100644
index 0000000..8d322d8
--- /dev/null
+++ b/inst/sequences/DarrenObbard.fasta
@@ -0,0 +1,6 @@
+>Reference
+ATGTGGTCGAGATATCGAAAGCTAGGGATATCGATTATATATAGCAAGATCGATAGAGGA
+TCGATGATCGATCGGGATCGACAGCTG
+>With out-of-frame gaps
+AT-TGGTCCAGGTATCGTAAGCTAGGGATATCGATTATATATAGCAAGATCGATAGGGGA
+TCGATGATCGATCGGGA--GACAGCTG
diff --git a/inst/sequences/ECOUNC.fsa b/inst/sequences/ECOUNC.fsa
new file mode 100755
index 0000000..9e212a2
--- /dev/null
+++ b/inst/sequences/ECOUNC.fsa
@@ -0,0 +1,133 @@
+>ECOUNC
+aaagcaaataaaatttaatttttatcaaaaaaatcataaaaaattgaccggttagactgt
+taacaacaaccaggttttctactgatataactggttacatttaacgccacgttcactctt
+ttgcatcaacaagataacgtggctttttttggtaagcagaaaataagtcattagtgaaaa
+tatcagtctgctaaaaatcggcgctaagaaccatcattggctgttaaaacattattaaaa
+atgtcaatgggtggtttttgttgtgtaaatgtcatttattaaaacagtatctgtttttag
+actgaaatatcataaacttgcaaaggcatcatttgccaagtaaataaatatgctgtgcgc
+gaacatgcgcaatatgtgatctgaagcacgctttatcaccagtgtttacgcgttatttac
+agtttttcatgatcgaacagggttagcagaaaagtcgcaattgtatgcactggaaaaata
+tttaaacatttattcaccttttggctacttattgtttgaaatcacgggggcgcaccgtat
+aatttgaccgctttttgatgcttgactctaagccttaaagaaagttttatacgacacgcg
+gcatacctcgaagggagcaggagtgaaaaacgtgatgtctgtgtcgctcgtgagtcgaaa
+cgttgctcggaagcttctgctcgttcagttactggtggtgatagcaagtggattgctgtt
+cagcctcaaagaccccttctggggcgtctctgcaataagcgggggcctggcagtctttct
+gcctaacgttttgtttatgatatttgcctggcgtcaccaggcgcatacaccagcgaaagg
+ccgggtggcctggacattcgcatttggcgaagctttcaaagttctggcgatgttggtgtt
+actggtggtggcgttggcggttttaaaggcggtattcttgccgctgatcgttacgtgggt
+tttggtgctggtggttcagatactggcaccggctgtaattaacaacaaagggtaaaaggc
+atcatggcttcagaaaatatgacgccgcaggattacataggacaccacctgaataacctt
+cagctggacctgcgtacattctcgctggtggatccacaaaaccccccagccaccttctgg
+acaatcaatattgactccatgttcttctcggtggtgctgggtctgttgttcctggtttta
+ttccgtagcgtagccaaaaaggcgaccagcggtgtgccaggtaagtttcagaccgcgatt
+gagctggtgatcggctttgttaatggtagcgtgaaagacatgtaccatggcaaaagcaag
+ctgattgctccgctggccctgacgatcttcgtctgggtattcctgatgaacctgatggat
+ttactgcctatcgacctgctgccgtacattgctgaacatgtactgggtctgcctgcactg
+cgtgtggttccgtctgcggacgtgaacgtaacgctgtctatggcactgggcgtatttatc
+ctgattctgttctacagcatcaaaatgaaaggcatcggcggcttcacgaaagagttgacg
+ctgcagccgttcaatcactgggcgttcattcctgtcaacttaatccttgaaggggtaagc
+ctgctgtccaaaccagtttcactcggtttgcgactgttcggtaacatgtatgccggtgag
+ctgattttcattctgattgctggtctgttgccgtggtggtcacagtggatcctgaatgtg
+ccgtgggccattttccacatcctgatcattacgctgcaagccttcatcttcatggttctg
+acgatcgtctatctgtcgatggcgtctgaagaacattaatttaccaacactactacgttt
+taactgaaacaaactggagactgtcatggaaaacctgaatatggatctgctgtacatggc
+tgccgctgtgatgatgggtctggcggcaatcggtgctgcgatcggtatcggcatcctcgg
+gggtaaattcctggaaggcgcagcgcgtcaacctgatctgattcctctgctgcgtactca
+gttctttatcgttatgggtctggtggatgctatcccgatgatcgctgtaggtctgggtct
+gtacgtgatgttcgctgtcgcgtagtaagcgttgcttttatttaaagagcaatatcagaa
+cgttaactaaatagaggcattgtgctgtgaatcttaacgcaacaatcctcggccaggcca
+tcgcgtttgtcctgttcgttctgttctgcatgaagtacgtatggccgccattaatggcag
+ccatcgaaaaacgtcaaaaagaaattgctgacggccttgcttccgcagaacgagcacata
+aggaccttgaccttgcaaaggccagcgcgaccgaccagctgaaaaaagcgaaagcggaag
+cccaggtaatcatcgagcaggcgaacaaacgccgctcgcagattctggacgaagcgaaag
+ctgaggcagaacaggaacgtactaaaatcgtggcccaggcgcaggcggaaattgaagccg
+agcgtaaacgtgcccgtgaagagctgcgtaagcaagttgctatcctggctgttgctggcg
+ccgagaagatcatcgaacgttccgtggatgaagctgctaacagcgacatcgtggataaac
+ttgtcgctgaactgtaaggagggaggggctgatgtctgaatttattacggtagctcgccc
+ctacgccaaagcagcttttgactttgccgtcgaacaccaaagtgtagaacgctggcagga
+catgctggcgtttgccgccgaggtaaccaaaaacgaacaaatggcagagcttctctctgg
+cgcgcttgcgccagaaacgctcgccgagtcgtttatcgcagtttgtggtgagcaactgga
+cgaaaacggtcagaacctgattcgggttatggctgaaaatggtcgtcttaacgcgctccc
+ggatgttctggagcagtttattcacctgcgtgccgtgagtgaggctaccgctgaggtaga
+cgtcatttccgctgccgcactgagtgaacaacagctcgcgaaaatttctgctgcgatgga
+aaaacgtctgtcacgcaaagttaagctgaattgcaaaatcgataagtctgtaatggcagg
+cgttatcatccgagcgggtgatatggtcattgatggcagcgtacgcggtcgtcttgagcg
+ccttgcagacgtcttgcagtcttaaggggactggagcatgcaactgaattccaccgaaat
+cagcgaactgatcaagcagcgcattgctcagttcaatgttgtgagtgaagctcacaacga
+aggtactattgtttctgtaagtgacggtgttatccgcattcacggcctggccgattgtat
+gcagggtgaaatgatctccctgccgggtaaccgttacgctatcgcactgaacctcgagcg
+cgactctgtaggtgcggttgttatgggtccgtacgctgaccttgccgaaggcatgaaagt
+taagtgtactggccgtatcctggaagttccggttggccgtggcctgctgggccgtgtggt
+taacactctgggtgcaccaatcgacggtaaaggtccgctggatcacgacggcttctctgc
+tgtagaagcaatcgctccgggcgttatcgaacgtcagtccgtagatcagccggtacagac
+cggttataaagccgttgactccatgatcccaatcggtcgtggtcagcgtgaattgatcat
+cggtgaccgtcagacaggtaaaaccgcactggctatcgatgccatcatcaaccagcgcga
+ttccggtatcaaatgtatctatgtcgctatcggccagaaagcgtccaccatttctaacgt
+ggtacgtaaactggaagagcacggcgcactggctaacaccatcgttgtggtagcaaccgc
+gtctgaatccgctgcactgcaatacctggcacgtatgccggttgcgctaatgggcgaata
+cttccgtgaccgcggtgaagatgcgctgatcatttacgatgacctgtctaaacaggctgt
+tgcttaccgtcagatctccctgctgctccgtcgtccgccaggacgtgaagcattcccggg
+cgacgttttctacctccactctcgtctgctggagcgtgctgcacgtgttaacgccgaata
+cgttgaagccttcaccaaaggtgaagtgaaagggaaaaccggttctctgaccgcactgcc
+gattatcgaaactcaggcgggtgacgtttctgcgttcgttccgaccaacgtaatctccat
+taccgatggtcagatcttcctggaaaccaacctgttcaacgccggtattcgtcctgcggt
+taacccgggtatttccgtatcccgtgttggtggtgcagcacagaccaagatcatgaaaaa
+actgtccggtggtatccgtaccgctctggcacagtatcgtgaactggcagcgttctctca
+gtttgcatccgaccttgacgatgcaacacgtaaccagcttgaccacggtcagaaagtgac
+cgaactgctgaaacagaaacagtatgcgccgatgtccgttgcgcagcagtctctggttct
+gttcgcagcagaacgtggttacctggcggatgttgaactgtcgaaaattggcagcttcga
+agccgctctgctggcttacgtcgaccgtgatcacgctccgttgatgcaagagatcaacca
+gaccggtggctacaacgacgaaatcgaaggcaagctgaaaggcatcctcgattccttcaa
+agcaacccaatcctggtaacgtctggcggcttgccttagggcaggccgcaaggcattgag
+gagaagctcatggccggcgcaaaagacatacgtagtaagatcgcaagcgtccagaacacg
+caaaagatcactaaagcgatggagatggtcgccgcttccaaaatgcgtaaatcgcaggat
+cgcatggcggccagccgtccttatgcagaaaccatgcgcaaagtgattggtcaccttgca
+cacggtaatctggaatataagcacccttacctggaagaccgcgacgttaaacgcgtgggc
+tacctggtggtgtcgaccgaccgtggtttgtgcggtggtttgaacattaacctgttcaaa
+aaactgctggcggaaatgaagacctggaccgacaaaggcgttcaatgcgacctcgcaatg
+atcggctcgaaaggcgtgtcgttcttcaactccgtgggcggcaatgttgttgcccaggtc
+accggcatgggggataacccttccctgtccgaactgatcggtccggtaaaagtgatgttg
+caggcctacgacgaaggccgtctggacaagctttacattgtcagcaacaaatttattaac
+accatgtctcaggttccgaccatcagccagctgctgccgttaccggcatcagatgatgat
+gatctgaaacataaatcctgggattacctgtacgaacccgatccgaaggcgttgctggat
+accctgctgcgtcgttatgtcgaatctcaggtttatcagggcgtggttgaaaacctggcc
+agcgagcaggccgcccgtatggtggcgatgaaagccgcgaccgacaatggcggcagcctg
+attaaagagctgcagttggtatacaacaaagctcgtcaggccagcattactcaggaactc
+accgagatcgtctcgggggccgccgcggtttaaacaggttatttcgtagaggatttaaga
+tggctactggaaagattgtccaggtaatcggcgccgtagttgacgtcgaattccctcagg
+atgccgtaccgcgcgtgtacgatgctcttgaggtgcaaaatggtaatgagcgtctggtgc
+tggaagttcagcagcagctcggcggcggtatcgtacgtaccatcgcaatgggttcctccg
+acggtctgcgtcgcggtctggatgtaaaagacctcgaacacccgattgaagtcccggtag
+gtaaagcgactctgggccgtatcatgaacgtactgggtgaaccggtcgacatgaaaggcg
+agatcggtgaagaagagcgttgggcgattcaccgcgcagcaccttcctacgaagagctgt
+caaactctcaggaactgctggaaaccggtatcaaagttatcgacctgatgtgtccgttcg
+ctaagggcggtaaagttggtctgttcggtggtgcgggtgtaggtaaaaccgtaaacatga
+tggagctcattcgtaacatcgcgatcgagcactccggttactctgtgtttgcgggcgtag
+gtgaacgtactcgtgagggtaacgacttctaccacgaaatgaccgactccaacgttatcg
+acaaagtatccctggtgtatggccagatgaacgagccgccgggaaaccgtctgcgcgttg
+ctctgaccggtctgaccatggctgagaaattccgtgacgaaggtcgtgacgttctgctgt
+tcgttgacaacatctatcgttacaccctggccggtacggaagtatccgcactgctgggcc
+gtatgccttcagcggtaggttatcagccgaccctggcggaagagatgggcgttctgcagg
+aacgtatcacctccaccaaaactggttctatcacctccgtacaggcagtatacgtacctg
+cggatgacttgactgacccgtctccggcaaccacctttgcgcaccttgacgcaaccgtgg
+tactgagccgtcagatcgcgtctctgggtatctacccggccgttgacccgctggactcca
+ccagccgtcagctggacccgctggtggttggtcaggaacactacgacaccgcgcgtggcg
+ttcagtccatcctgcaacgttatcaggaactgaaagacatcatcgccatcctgggtatgg
+atgaactgtctgaagaagacaaactggtggtagcgcgtgctcgtaagatccagcgcttcc
+tgtcccagccgttcttcgtggcagaagtattcaccggttctccgggtaaatacgtctccc
+tgaaagacaccatccgtggctttaaaggcatcatggaaggcgaatacgatcacctgccgg
+agcaggcgttctacatggtcggttccatcgaagaagctgtggaaaaagccaaaaaacttt
+aacgccttaatcggagggtgatatggcaatgacttaccacctggacgtcgtcagcgcaga
+gcaacaaatgttctctggtctggtcgagaaaatccaggtaacgggtagcgaaggtgaact
+ggggatctaccctggccacgcaccgctgctcaccgccattaagcctggtatgattcgcat
+cgtgaaacagcacggtcacgaagagtttatctatctgtctggcggcattcttgaagtgca
+gcctggcaacgtgaccgttctggccgacaccgcaattcgcggccaggatctcgacgaagc
+gcgagccatggaagcgaaacgtaaggctgaagagcacattagcagctctcacggcgacgt
+agattacgctcaggcgtctgcggaactggccaaagcgatcgcgcagctgcgcgttatcga
+gttgaccaaaaaagcgatgtaacaccggcttgaaaagcacaaaagccagtctggaaacag
+gctggcttttttttgcgcgtgtgacccgtcctgaatagcgttcacatagatcctgctgat
+ataaaacccccctgttttcctgtttattcattgatcgaaataagagcaaaaacatccacc
+tgacgcttaaattaaggtactgccttaattttctgcagacaaaaggcgtgacgatggtcg
+aaaatggcgctttcgtcagcggggataatccgttattgaacaatttatcctctgtccatt
+tcacgatgaaaaaaatgtagttttttcaaggtgaagcggtttaaattcgttctcaaatta
+cagtcaggacgcgtatgttga
diff --git a/inst/sequences/UBIQUITIN.mase b/inst/sequences/UBIQUITIN.mase
new file mode 100644
index 0000000..b04b609
--- /dev/null
+++ b/inst/sequences/UBIQUITIN.mase
@@ -0,0 +1,19 @@
+;;Ubiquitin, human and mouse sequences aligned by clustalw1.7 on Tue Sep 14 15:05:45 2004
+;empty description
+human_ubiquitin
+ATGCAGATCTTTGTGAAGACCCTCACTGGCAAAACCATCACCCTTGAGGTCGAGCCCAGT
+GACACCATTGAGAATGTCAAAGCCAAAATTCAAGACAAGGAGGGTATCCCACCTGACCAG
+CAGCGTCTGATATTTGCCGGCAAACAGCTGGAGGATGGCCGCACTCTCTCAGACTACAAC
+ATCCAGAAAGAGTCCACCCTGCACCTGGTGTTGCGCCTGCGAGGTGGCATTATTGAGCCT
+TCTCTCCGCCAGCTTGCCCAGAAATACAACTGCGACAAGATGATCTGCCGCAAGTGCTAT
+GCTCGCCTTCACCCTCGTGCTGTCAACTGCCGCAAGAAGAAGTGTGGTCACACCAACAAC
+CTGCGTCCCAAGAAGAAGGTCAAA
+;empty description
+mouse_ubiquitin
+ATGCAGATCTTCGTGAAGACCCTGACGGGCAAGACCATCACTCTTGAGGTCGAGCCCAGT
+GACACCATCGAGAATGTCAAGGCCAAGATCCAAGACAAGGAAGGCATCCCACCTGACCAG
+CAGAGGCTGATATTCGCGGGCAAACAGCTGGAGGATGGCCGCACCCTGTCCGACTACAAC
+ATCCAGAAAGAGTCCACCTTGCACCTGGTGCTGCGTCTGCGCGGTGGCATCATTGAGCCA
+TCCCTTCGTCAGCTTGCCCAGAAGTACAACTGTGACAAGATGATCTGCCGCAAGTGCTAC
+GCACGCCTGCACCCTCGTGCAGTCAACTGCCGCAAGAAGAAGTGCGGCCATACCAACAAC
+CTGCGCCCCAAGAAGAAGGTCAAA
diff --git a/inst/sequences/ame1.gbk b/inst/sequences/ame1.gbk
new file mode 100644
index 0000000..c0afaff
--- /dev/null
+++ b/inst/sequences/ame1.gbk
@@ -0,0 +1,581 @@
+LOCUS NW_001253191 395045 bp DNA linear CON 27-JUL-2006
+DEFINITION Apis mellifera linkage group 1 genomic contig, reference assembly
+ (based on Amel_4.0 Group1.32), whole genome shotgun sequence.
+ACCESSION NW_001253191
+VERSION NW_001253191.1 GI:110749203
+KEYWORDS WGS.
+SOURCE Apis mellifera (honey bee)
+ ORGANISM Apis mellifera
+ Eukaryota; Metazoa; Arthropoda; Hexapoda; Insecta; Pterygota;
+ Neoptera; Endopterygota; Hymenoptera; Apocrita; Aculeata; Apoidea;
+ Apidae; Apis.
+COMMENT GENOME ANNOTATION REFSEQ: Features on this sequence have been
+ produced for build 4 version 1 of the NCBI's genome annotation [see
+ documentation].
+ The DNA sequence is from the whole genome assembly released by the
+ Baylor College of Medicine Human Genome Sequencing Center as
+ Amel_4.0, 10 March 2006 (see
+ http://www.hgsc.bcm.tmc.edu/projects/honeybee/). The original whole
+ genome shotgun project has the project accession AADG00000000.5.
+FEATURES Location/Qualifiers
+ source 1..395045
+ /organism="Apis mellifera"
+ /mol_type="genomic DNA"
+ /strain="DH4"
+ /db_xref="taxon:7460"
+ gap 15697..16488
+ /estimated_length=792
+ gap 33780..33829
+ /estimated_length=50
+ gap 36567..36616
+ /estimated_length=50
+ STS 36914..37059
+ /db_xref="UniSTS:496079"
+ gene complement(<41314..42282)
+ /gene="LOC725278"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 34 Proteins"
+ /db_xref="GeneID:725278"
+ mRNA complement(<41314..42282)
+ /gene="LOC725278"
+ /product="similar to bab2 CG9102-PA"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 34 Proteins"
+ /transcript_id="XM_001121146.1"
+ /db_xref="GI:110749172"
+ /db_xref="GeneID:725278"
+ CDS complement(<41314..42282)
+ /gene="LOC725278"
+ /codon_start=1
+ /product="similar to bab2 CG9102-PA"
+ /protein_id="XP_001121146.1"
+ /db_xref="GI:110749173"
+ /db_xref="GeneID:725278"
+ gap 45235..45284
+ /estimated_length=50
+ gap 80327..80816
+ /estimated_length=490
+ STS 86242..86449
+ /db_xref="UniSTS:468894"
+ gap 90521..90570
+ /estimated_length=50
+ gene 91144..95539
+ /gene="LOC408598"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 2 ESTs, 4 Proteins"
+ /db_xref="GeneID:408598"
+ mRNA join(91144..91461,91544..91675,92508..92534,93494..93661,
+ 93753..93824,93894..94018,94102..94226,94631..94807,
+ 94929..95272,95363..95539)
+ /gene="LOC408598"
+ /product="similar to skiff CG30021-PA"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 2 ESTs, 4 Proteins"
+ /transcript_id="XM_392140.2"
+ /db_xref="GI:66499702"
+ /db_xref="GeneID:408598"
+ CDS join(91144..91461,91544..91675,92508..92534,93494..93661,
+ 93753..93824,93894..94018,94102..94226,94631..94807,
+ 94929..95272,95363..95539)
+ /gene="LOC408598"
+ /codon_start=1
+ /product="similar to skiff CG30021-PA"
+ /protein_id="XP_392140.2"
+ /db_xref="GI:66499703"
+ /db_xref="GeneID:408598"
+ gene 99353..105343
+ /gene="LOC725347"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST"
+ /db_xref="GeneID:725347"
+ mRNA join(99353..99440,99825..99904,100322..100664,
+ 100900..101274,101813..102003,103301..103430,
+ 104176..104316,104396..104580,104922..105145,
+ 105220..105343)
+ /gene="LOC725347"
+ /product="similar to ankyrin repeat domain 32"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST"
+ /transcript_id="XM_001121206.1"
+ /db_xref="GI:110749174"
+ /db_xref="GeneID:725347"
+ CDS join(99353..99440,99825..99904,100322..100664,
+ 100900..101274,101813..102003,103301..103430,
+ 104176..104316,104396..104580,104922..105145,
+ 105220..105343)
+ /gene="LOC725347"
+ /codon_start=1
+ /product="similar to ankyrin repeat domain 32"
+ /protein_id="XP_001121206.1"
+ /db_xref="GI:110749175"
+ /db_xref="GeneID:725347"
+ gene complement(105904..108711)
+ /gene="LOC410730"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 2 Proteins"
+ /db_xref="GeneID:410730"
+ mRNA complement(join(105904..106812,106865..106972,
+ 107049..107413,107869..108016,108226..108711))
+ /gene="LOC410730"
+ /product="similar to wurst CG9089-PA"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 2 Proteins"
+ /transcript_id="XM_394206.3"
+ /db_xref="GI:110749176"
+ /db_xref="GeneID:410730"
+ CDS complement(join(106600..106812,106865..106972,
+ 107049..107413,107869..108016,108226..108513))
+ /gene="LOC410730"
+ /codon_start=1
+ /product="similar to wurst CG9089-PA"
+ /protein_id="XP_394206.1"
+ /db_xref="GI:48094544"
+ /db_xref="GeneID:410730"
+ gene 110434..112649
+ /gene="LOC408597"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 3 ESTs, 4 Proteins"
+ /db_xref="GeneID:408597"
+ mRNA join(110434..110685,111064..111365,111455..111997,
+ 112143..112649)
+ /gene="LOC408597"
+ /product="similar to U2 small nuclear ribonucleoprotein
+ auxiliary factor 35 kDa subunit related-protein 2
+ (U2(RNU2) small nuclear RNA auxillary factor 1-like 2)
+ (NY-REN-20 antigen)"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 3 ESTs, 4 Proteins"
+ /transcript_id="XM_392139.3"
+ /db_xref="GI:110749177"
+ /db_xref="GeneID:408597"
+ CDS join(110434..110685,111064..111365,111455..111997,
+ 112143..112407)
+ /gene="LOC408597"
+ /codon_start=1
+ /product="similar to U2 small nuclear ribonucleoprotein
+ auxiliary factor 35 kDa subunit related-protein 2
+ (U2(RNU2) small nuclear RNA auxillary factor 1-like 2)
+ (NY-REN-20 antigen)"
+ /protein_id="XP_392139.1"
+ /db_xref="GI:48094536"
+ /db_xref="GeneID:408597"
+ gene 113052..115690
+ /gene="LOC410729"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 11 Proteins"
+ /db_xref="GeneID:410729"
+ mRNA join(113052..113249,113318..113499,113681..113913,
+ 113971..114106,114174..114240,114324..114531,
+ 114606..114738,114813..114982,115054..115690)
+ /gene="LOC410729"
+ /product="similar to CG3734-PA"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 11 Proteins"
+ /transcript_id="XM_623673.2"
+ /db_xref="GI:110749178"
+ /db_xref="GeneID:410729"
+ CDS join(113052..113249,113318..113499,113681..113913,
+ 113971..114106,114174..114240,114324..114531,
+ 114606..114738,114813..114982,115054..115163)
+ /gene="LOC410729"
+ /codon_start=1
+ /product="similar to CG3734-PA"
+ /protein_id="XP_623676.2"
+ /db_xref="GI:110749179"
+ /db_xref="GeneID:410729"
+ gene complement(115594..117288)
+ /gene="LOC408596"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 3 Proteins"
+ /db_xref="GeneID:408596"
+ mRNA complement(join(115594..115799,115897..116110,
+ 116300..116493,116737..116798,116855..117014,
+ 117111..117288))
+ /gene="LOC408596"
+ /product="similar to Cellular retinaldehyde binding
+ protein CG10546-PA"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 3 Proteins"
+ /transcript_id="XM_392138.3"
+ /db_xref="GI:110749180"
+ /db_xref="GeneID:408596"
+ CDS complement(join(115594..115799,115897..116110,
+ 116300..116493,116737..116798,116855..117014,
+ 117111..117288))
+ /gene="LOC408596"
+ /codon_start=1
+ /product="similar to Cellular retinaldehyde binding
+ protein CG10546-PA"
+ /protein_id="XP_392138.3"
+ /db_xref="GI:110749181"
+ /db_xref="GeneID:408596"
+ gene 123566..130015
+ /gene="LOC725576"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 2 ESTs, 4 Proteins"
+ /db_xref="GeneID:725576"
+ mRNA join(123566..123754,124184..124294,125135..125298,
+ 125396..125547,127204..127427,127522..127655,
+ 127746..128013,128964..129236,129308..129477,
+ 129697..129806,129939..130015)
+ /gene="LOC725576"
+ /product="similar to CG12121-PA"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 2 ESTs, 4 Proteins"
+ /transcript_id="XM_001121399.1"
+ /db_xref="GI:110749183"
+ /db_xref="GeneID:725576"
+ CDS join(123704..123754,124184..124294,125135..125298,
+ 125396..125547,127204..127427,127522..127655,
+ 127746..128013,128964..129236,129308..129477,
+ 129697..129806,129939..130015)
+ /gene="LOC725576"
+ /codon_start=1
+ /product="similar to CG12121-PA"
+ /protein_id="XP_001121399.1"
+ /db_xref="GI:110749184"
+ /db_xref="GeneID:725576"
+ gap 144819..160230
+ /estimated_length=15412
+ gap 177671..177720
+ /estimated_length=50
+ gene 178989..209002
+ /gene="LOC551385"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 11 Proteins"
+ /db_xref="GeneID:551385"
+ mRNA join(178989..179052,204368..204392,205334..205382,
+ 205486..205575,207877..207955,208878..209002)
+ /gene="LOC551385"
+ /product="similar to CG5907-PA, isoform A"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 11 Proteins"
+ /transcript_id="XM_623780.1"
+ /db_xref="GI:66499716"
+ /db_xref="GeneID:551385"
+ CDS join(178989..179052,204368..204392,205334..205382,
+ 205486..205575,207877..207955,208878..209002)
+ /gene="LOC551385"
+ /codon_start=1
+ /product="similar to CG5907-PA, isoform A"
+ /protein_id="XP_623783.1"
+ /db_xref="GI:66499717"
+ /db_xref="GeneID:551385"
+ gap 215643..216092
+ /estimated_length=450
+ gap 235974..236265
+ /estimated_length=292
+ gene 265623..269178
+ /gene="LOC725683"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 4 Proteins"
+ /db_xref="GeneID:725683"
+ mRNA join(265623..265731,266759..267418,267523..267680,
+ 267766..267924,268017..268286,268445..268696,
+ 268834..269178)
+ /gene="LOC725683"
+ /product="similar to dusky CG9355-PA"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 4 Proteins"
+ /transcript_id="XM_001121500.1"
+ /db_xref="GI:110749185"
+ /db_xref="GeneID:725683"
+ CDS join(265623..265731,266759..267418,267523..267680,
+ 267766..267924,268017..268286,268445..268696,
+ 268834..269178)
+ /gene="LOC725683"
+ /codon_start=1
+ /product="similar to dusky CG9355-PA"
+ /protein_id="XP_001121500.1"
+ /db_xref="GI:110749186"
+ /db_xref="GeneID:725683"
+ gene complement(269821..270523)
+ /gene="LOC725721"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 4 Proteins"
+ /pseudo
+ /db_xref="GeneID:725721"
+ mRNA complement(join(269821..270470,270502..270523))
+ /gene="LOC725721"
+ /product="similar to CG9960-PA, isoform A"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 4 Proteins"
+ /pseudo
+ /transcript_id="XR_014924.1"
+ /db_xref="GI:110749187"
+ /db_xref="GeneID:725721"
+ gene complement(270647..271288)
+ /gene="LOC725747"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 2 Proteins"
+ /db_xref="GeneID:725747"
+ mRNA complement(join(270647..270971,271137..271288))
+ /gene="LOC725747"
+ /product="similar to CG9958-PA, isoform A"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 2 Proteins"
+ /transcript_id="XM_001121556.1"
+ /db_xref="GI:110749188"
+ /db_xref="GeneID:725747"
+ CDS complement(join(270647..270971,271137..271288))
+ /gene="LOC725747"
+ /codon_start=1
+ /product="similar to CG9958-PA, isoform A"
+ /protein_id="XP_001121556.1"
+ /db_xref="GI:110749189"
+ /db_xref="GeneID:725747"
+ gap 277350..277399
+ /estimated_length=50
+ gap 286941..287823
+ /estimated_length=883
+ gene complement(293586..351751)
+ /gene="LOC725776"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 2 Proteins"
+ /db_xref="GeneID:725776"
+ mRNA complement(join(293586..293779,300730..300789,
+ 301316..301734,301813..302291,351275..351751))
+ /gene="LOC725776"
+ /product="similar to CG12029-PA"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 2 Proteins"
+ /transcript_id="XM_001121583.1"
+ /db_xref="GI:110749190"
+ /db_xref="GeneID:725776"
+ CDS complement(join(293586..293779,300730..300789,
+ 301316..301734,301813..302291,351275..351751))
+ /gene="LOC725776"
+ /codon_start=1
+ /product="similar to CG12029-PA"
+ /protein_id="XP_001121583.1"
+ /db_xref="GI:110749191"
+ /db_xref="GeneID:725776"
+ gap 310814..311565
+ /estimated_length=752
+ gap 312805..312854
+ /estimated_length=50
+ gene complement(357891..360635)
+ /gene="LOC551557"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST"
+ /db_xref="GeneID:551557"
+ mRNA complement(join(357891..357970,359209..359315,
+ 359494..359602,359709..359839,359917..360182,
+ 360337..360635))
+ /gene="LOC551557"
+ /product="hypothetical LOC551557"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST"
+ /transcript_id="XM_625253.2"
+ /db_xref="GI:110749192"
+ /db_xref="GeneID:551557"
+ CDS complement(join(357891..357970,359209..359315,
+ 359494..359602,359709..359839,359917..360182,
+ 360337..360384))
+ /gene="LOC551557"
+ /codon_start=1
+ /product="hypothetical protein"
+ /protein_id="XP_625256.1"
+ /db_xref="GI:66499720"
+ /db_xref="GeneID:551557"
+ gap 360694..361646
+ /estimated_length=953
+ gene 367137..370759
+ /gene="LOC413713"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 6 Proteins"
+ /db_xref="GeneID:413713"
+ mRNA join(367137..367451,368003..368231,368363..368705,
+ 368774..368893,369011..369200,369592..369666,
+ 369741..369798,369879..370006,370110..370313,
+ 370421..370759)
+ /gene="LOC413713"
+ /product="similar to CG31915-PA"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 6 Proteins"
+ /transcript_id="XM_397154.3"
+ /db_xref="GI:110749193"
+ /db_xref="GeneID:413713"
+ CDS join(367309..367451,368003..368231,368363..368705,
+ 368774..368893,369011..369200,369592..369666,
+ 369741..369798,369879..370006,370110..370313,
+ 370421..370589)
+ /gene="LOC413713"
+ /codon_start=1
+ /product="similar to CG31915-PA"
+ /protein_id="XP_397154.2"
+ /db_xref="GI:66499725"
+ /db_xref="GeneID:413713"
+ gene 372041..374176
+ /gene="LOC410115"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 19 Proteins"
+ /db_xref="GeneID:410115"
+ mRNA join(372041..372257,372600..372780,372868..373182,
+ 373312..373626,373711..373929,373997..374176)
+ /gene="LOC410115"
+ /product="similar to Actin-related protein 66B CG7558-PA"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 19 Proteins"
+ /transcript_id="XM_393599.3"
+ /db_xref="GI:110749194"
+ /db_xref="GeneID:410115"
+ CDS join(372214..372257,372600..372780,372868..373182,
+ 373312..373626,373711..373929,373997..374176)
+ /gene="LOC410115"
+ /codon_start=1
+ /product="similar to Actin-related protein 66B CG7558-PA"
+ /protein_id="XP_393599.2"
+ /db_xref="GI:66499731"
+ /db_xref="GeneID:410115"
+ gene complement(375339..378048)
+ /gene="LOC410116"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 3 Proteins"
+ /db_xref="GeneID:410116"
+ mRNA complement(join(375339..375363,375481..375671,
+ 375749..376042,376136..376445,376506..376759,
+ 376895..377123,377210..377568,377725..377879,
+ 377982..378048))
+ /gene="LOC410116"
+ /product="similar to Protein FKSG26, transcript variant 1"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 3 Proteins"
+ /transcript_id="XM_393600.3"
+ /db_xref="GI:110749195"
+ /db_xref="GeneID:410116"
+ CDS complement(join(375339..375363,375481..375671,
+ 375749..376042,376136..376445,376506..376759,
+ 376895..377123,377210..377568,377725..377856))
+ /gene="LOC410116"
+ /codon_start=1
+ /product="similar to Protein FKSG26 isoform 1"
+ /protein_id="XP_393600.2"
+ /db_xref="GI:66499735"
+ /db_xref="GeneID:410116"
+ gene 378362..380121
+ /gene="LOC726037"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 2 Proteins"
+ /db_xref="GeneID:726037"
+ mRNA join(378362..378559,378800..378859,378935..379057,
+ 379136..379352,379430..379468,379559..379627,
+ 379691..379890,380107..380121)
+ /gene="LOC726037"
+ /product="similar to CG3408-PA"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST, 2 Proteins"
+ /transcript_id="XM_001121810.1"
+ /db_xref="GI:110749196"
+ /db_xref="GeneID:726037"
+ CDS join(378470..378559,378800..378859,378935..379057,
+ 379136..379352,379430..379468,379559..379627,
+ 379691..379890,380107..380121)
+ /gene="LOC726037"
+ /codon_start=1
+ /product="similar to CG3408-PA"
+ /protein_id="XP_001121810.1"
+ /db_xref="GI:110749197"
+ /db_xref="GeneID:726037"
+ gene complement(380225..381671)
+ /gene="LOC551729"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST"
+ /db_xref="GeneID:551729"
+ mRNA complement(join(380225..380237,380363..380868,
+ 380999..381397,381554..381671))
+ /gene="LOC551729"
+ /product="hypothetical LOC551729"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 1 EST"
+ /transcript_id="XM_624117.2"
+ /db_xref="GI:110749198"
+ /db_xref="GeneID:551729"
+ CDS complement(join(380225..380237,380363..380868,
+ 380999..381397,381554..381616))
+ /gene="LOC551729"
+ /codon_start=1
+ /product="hypothetical protein"
+ /protein_id="XP_624120.2"
+ /db_xref="GI:110749199"
+ /db_xref="GeneID:551729"
+ gap 381672..381725
+ /estimated_length=54
+ gene complement(384791..393499)
+ /gene="LOC413907"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 9 Proteins"
+ /db_xref="GeneID:413907"
+ mRNA complement(join(384791..384940,385031..385276,
+ 385388..385636,386181..386399,386511..386684,
+ 387874..388075,388580..388710,389129..389248,
+ 389446..389715,389790..389882,389975..390365,
+ 391150..391397,391484..391614,391950..392010,
+ 392243..392415,392491..392798,393312..393499))
+ /gene="LOC413907"
+ /product="similar to Pyruvate dehydrogenase E1 component
+ alpha subunit, testis-specific form, mitochondrial
+ precursor (PDHE1-A type II)"
+ /note="Derived by automated computational analysis using
+ gene prediction method: GNOMON. Supporting evidence
+ includes similarity to: 9 Proteins"
+ /transcript_id="XM_397346.3"
+ /db_xref="GI:110749200"
+ /db_xref="GeneID:413907"
+ CDS complement(join(384791..384940,385031..385276,
+ 385388..385636,386181..386399,386511..386684,
+ 387874..388075,388580..388710,389129..389248,
+ 389446..389715,389790..389882,389975..390365,
+ 391150..391397,391484..391614,391950..392010,
+ 392243..392415,392491..392798,393312..393499))
+ /gene="LOC413907"
+ /codon_start=1
+ /product="similar to Pyruvate dehydrogenase E1 component
+ alpha subunit, testis-specific form, mitochondrial
+ precursor (PDHE1-A type II)"
+ /protein_id="XP_397346.3"
+ /db_xref="GI:110749201"
+ /db_xref="GeneID:413907"
+ STS 389225..389486
+ /db_xref="UniSTS:496080"
+ gap 393500..393670
+ /estimated_length=171
diff --git a/inst/sequences/bb.acc b/inst/sequences/bb.acc
new file mode 100644
index 0000000..099be13
--- /dev/null
+++ b/inst/sequences/bb.acc
@@ -0,0 +1,20 @@
+AY382159
+AY382160
+AY491412
+AY498719
+AY498720
+AY498721
+AY498722
+AY498723
+AY498724
+AY498725
+AY498726
+AY498727
+AY498728
+AY498729
+AY499181
+AY500379
+AY500380
+AY500381
+AY500382
+AY500383
diff --git a/inst/sequences/bb.kwd b/inst/sequences/bb.kwd
new file mode 100644
index 0000000..4ae6085
--- /dev/null
+++ b/inst/sequences/bb.kwd
@@ -0,0 +1,20 @@
+PLASMID
+CIRCULAR
+PARTIAL
+5'-PARTIAL
+3'-PARTIAL
+MOTA GENE
+MOTB GENE
+DIVISION PRO
+GYRB GENE
+JOINING REGION
+FTSA GENE
+RPOB GENE
+RPOC GENE
+FLA GENE
+DNAJ GENE
+TUF GENE
+PGK GENE
+RUVA GENE
+RUVB GENE
+PROMOTER REGION
diff --git a/inst/sequences/bb.mne b/inst/sequences/bb.mne
new file mode 100644
index 0000000..224594a
--- /dev/null
+++ b/inst/sequences/bb.mne
@@ -0,0 +1,20 @@
+A04009.OSPA
+A04009.OSPB
+A22442
+A24006
+A24008
+A24010
+A24012
+A24014
+A24016
+A33362
+A67759.PE1
+AB011063
+AB011064
+AB011065
+AB011066
+AB011067
+AB035616
+AB035617
+AB035618
+AB041949.VLSE
diff --git a/inst/sequences/bb.sp b/inst/sequences/bb.sp
new file mode 100644
index 0000000..f605ad7
--- /dev/null
+++ b/inst/sequences/bb.sp
@@ -0,0 +1,20 @@
+BORRELIA ANSERINA
+BORRELIA CORIACEAE
+BORRELIA PARKERI
+BORRELIA TURICATAE
+BORRELIA HERMSII
+BORRELIA CROCIDURAE
+BORRELIA LONESTARI
+BORRELIA HISPANICA
+BORRELIA BARBOURI
+BORRELIA THEILERI
+BORRELIA DUTTONII
+BORRELIA MIYAMOTOI
+BORRELIA PERSICA
+BORRELIA RECURRENTIS
+BORRELIA BURGDORFERI
+BORRELIA AFZELII
+BORRELIA GARINII
+BORRELIA ANDERSONII
+BORRELIA VALAISIANA
+BORRELIA JAPONICA
diff --git a/inst/sequences/bordetella.fasta b/inst/sequences/bordetella.fasta
new file mode 100644
index 0000000..272bef0
--- /dev/null
+++ b/inst/sequences/bordetella.fasta
@@ -0,0 +1,38 @@
+>BP0002
+atgagcgccgttcccgatatccccggcggccccgcgcagcggctggcccaggcctgcgat
+gcgctgcgattgccggccgacgccggccagcagcagaagctgctgcgctatatcgagcaa
+atgcagcgctggaaccgcacgtacaacctgactgccatccgggatccggggcagatgctc
+gtgcagcacctgttcgacagtctgtcggtcgtggcgccgctggagcgtggcctgcccgcc
+gccgggtccggcgcgcgcgtcaagctgttcgacgtcggctccggcggcggcctgcccggc
+gtggtgctggccatcatgcgcgcccattgggacgtcacatgcgtggacgcagtcgagaag
+aaaaccgcattcgtgcgacagatggccggcgcgctcggactgcccaatctgcaggccgcg
+catacccgtatcgaacagctcgaaccggcgcaatgcgacgtggtgatatcgcgtgcgttc
+gcttcgttacaggacttcgcgaagctggccggccgccacgtgcgcgagggtggtaccctc
+gtcgccatgaagggcaaggtgcccgatgacgaaatccaggcgttacagcaacacggccac
+tggacggtcgaacggatcgaaccgttggtggtgccggcactcgacgcgcaacgctgcctg
+atatggatgcgacgcagtcaaggaaacata
+>BPP0002
+atgagcgccgttcccgatatccccggcggccccgcgcagcggctggcccaggcctgcgat
+gcgctgcgattgccggccgacgccggccagcagcagaagctgctgcgctatatcgagcaa
+atgcagcgctggaaccgcacgtacaacctgactgccatccgggacccggggcagatgctc
+gtgcagcacctgttcgacagtctgtcggtcgtggcgccgctggagcgtggcctgcccgcc
+gccgggtccggcgcgcgcgtcaagctgttcgacgtcggctccggcggcggcctgcccggc
+gtggtgctggccatcatgcgcgcccattgggacgtcacatgcgtggacgcagtcgagaag
+aaaaccgcattcgtgcggcagatggccggcgcgctcggactgcccaatctgcaggccgcg
+catacccgtatcgaacagctcgaaccggcgcaatgcgacgtggtgatatcgcgtgcgttc
+gcttcgttacaggacttcgcgaagctggccggccgccacgtgcgcgagggtggtaccctc
+gtcgccatgaagggcaaggtgcccgatgacgaaatccaggcgttacagcaacacggccac
+tggacggtcgaacggatcgaaccgttggtggtgccggcactcgacgcgcaacgctgcctg
+atatggatgcgacgcagtcaaggaaacata
+>BB0002
+atgagcgccgttcccgatatccccggcggccccgcgcagcggctggcccaggcctgcgat
+gcgctgcgattgccggccgacgccggccagcagcagaagctgctgcgctatatcgagcaa
+atgcagcgctggaaccgcacgtacaacctgactgccatccgggacccggggcagatgctc
+gtgcagcacctgttcgacagtctgtcggtcgtggcgccgctggagcgcggcctgcccggc
+gtggtgctggccatcatgcgcgcccattgggacgtcacatgcgtggacgcagtcgagaag
+aaaaccgcattcgtgcggcagatggccggcgcgctcggactgcccaatctgcaggccgcg
+catacccgtatcgaacagctcgaaccggcgcaatgcgacgtggtgatatcgcgtgcgttc
+gcttcgttacaggacttcgcgaagctggccggccgccacgtgcgcgagggtggtaccctc
+gtcgccatgaagggcaaggtgcccgatgacgaaatccaggcgttacagcaacacggccac
+tggacggtcgaacggatcgaaccgttggtggtgccggcactcgacgcgcaacgctgcctg
+atatggatgcgacgcagtcaaggaaacata
diff --git a/inst/sequences/bordetella.pep.aln b/inst/sequences/bordetella.pep.aln
new file mode 100644
index 0000000..4e5770a
--- /dev/null
+++ b/inst/sequences/bordetella.pep.aln
@@ -0,0 +1,22 @@
+CLUSTAL W (1.81) multiple sequence alignment
+
+
+BP0002 MSAVPDIPGGPAQRLAQACDALRLPADAGQQQKLLRYIEQMQRWNRTYNLTAIRDPGQML
+BB0002 MSAVPDIPGGPAQRLAQACDALRLPADAGQQQKLLRYIEQMQRWNRTYNLTAIRDPGQML
+BPP0002 MSAVPDIPGGPAQRLAQACDALRLPADAGQQQKLLRYIEQMQRWNRTYNLTAIRDPGQML
+ ************************************************************
+
+BP0002 VQHLFDSLSVVAPLERGLPAAGSGARVKLFDVGSGGGLPGVVLAIMRAHWDVTCVDAVEK
+BB0002 VQHLFDSLSVVAPLER--------------------GLPGVVLAIMRAHWDVTCVDAVEK
+BPP0002 VQHLFDSLSVVAPLERGLPAAGSGARVKLFDVGSGGGLPGVVLAIMRAHWDVTCVDAVEK
+ ****************. .::.:.: . . .:..************************
+
+BP0002 KTAFVRQMAGALGLPNLQAAHTRIEQLEPAQCDVVISRAFASLQDFAKLAGRHVREGGTL
+BB0002 KTAFVRQMAGALGLPNLQAAHTRIEQLEPAQCDVVISRAFASLQDFAKLAGRHVREGGTL
+BPP0002 KTAFVRQMAGALGLPNLQAAHTRIEQLEPAQCDVVISRAFASLQDFAKLAGRHVREGGTL
+ ************************************************************
+
+BP0002 VAMKGKVPDDEIQALQQHGHWTVERIEPLVVPALDAQRCLIWMRRSQGNI
+BB0002 VAMKGKVPDDEIQALQQHGHWTVERIEPLVVPALDAQRCLIWMRRSQGNI
+BPP0002 VAMKGKVPDDEIQALQQHGHWTVERIEPLVVPALDAQRCLIWMRRSQGNI
+ **************************************************
diff --git a/inst/sequences/ct.bfa b/inst/sequences/ct.bfa
new file mode 100644
index 0000000..9d91241
Binary files /dev/null and b/inst/sequences/ct.bfa differ
diff --git a/inst/sequences/ct.fasta.gz b/inst/sequences/ct.fasta.gz
new file mode 100644
index 0000000..08f93e2
Binary files /dev/null and b/inst/sequences/ct.fasta.gz differ
diff --git a/inst/sequences/ct.gbk.gz b/inst/sequences/ct.gbk.gz
new file mode 100644
index 0000000..75bacb2
Binary files /dev/null and b/inst/sequences/ct.gbk.gz differ
diff --git a/inst/sequences/ct.predict b/inst/sequences/ct.predict
new file mode 100644
index 0000000..ebb2da4
--- /dev/null
+++ b/inst/sequences/ct.predict
@@ -0,0 +1,948 @@
+>CHLTCG 1042519 residues
+orf00001 1041920 1176 +1 5.42
+orf00002 1593 1321 -1 2.06
+orf00004 1794 2096 +3 6.12
+orf00005 2108 3583 +2 8.73
+orf00006 3729 5051 +3 8.95
+orf00009 6241 5150 -2 7.25
+orf00010 6938 6369 -3 6.69
+orf00011 7093 7233 +1 1.77
+orf00012 7251 8201 +3 7.10
+orf00013 9119 8217 -3 7.66
+orf00014 9096 9212 +3 0.42
+orf00015 9373 9804 +1 9.58
+orf00016 11158 9791 -2 6.77
+orf00017 12546 11290 -1 6.57
+orf00018 13235 12543 -3 6.44
+orf00021 13612 14952 +1 5.65
+orf00022 14964 16025 +3 5.35
+orf00023 17427 16123 -1 5.59
+orf00024 17636 18364 +2 8.20
+orf00025 18418 19851 +1 8.71
+orf00026 20386 19913 -2 5.57
+orf00027 20372 20530 +2 0.23
+orf00028 21179 21045 -3 1.11
+orf00029 21432 24542 +3 7.64
+orf00030 26472 24601 -1 7.24
+orf00033 27416 26673 -3 7.07
+orf00034 27492 27818 +3 4.33
+orf00036 28006 29085 +1 8.36
+orf00037 29069 29941 +2 6.28
+orf00038 29938 31284 +1 6.61
+orf00039 31275 31625 +3 6.73
+orf00040 31641 32699 +3 6.46
+orf00041 32725 33090 +1 5.06
+orf00042 33154 33807 +1 7.95
+orf00043 33798 34415 +3 7.40
+orf00044 34408 34710 +1 4.24
+orf00045 34710 36362 +3 7.74
+orf00047 38742 36502 -1 6.72
+orf00048 40012 38990 -2 7.04
+orf00050 40354 41127 +1 6.34
+orf00051 42303 41092 -1 4.19
+orf00052 42447 42560 +3 1.17
+orf00054 43173 42865 -1 4.53
+orf00056 43803 43231 -1 7.78
+orf00057 43891 44043 +1 2.64
+orf00058 44085 45089 +3 3.44
+orf00059 45043 45903 +1 5.55
+orf00060 47965 45965 -2 5.67
+orf00061 48586 48083 -2 7.00
+orf00063 49052 49525 +2 9.45
+orf00064 49866 51365 +3 8.96
+orf00065 51940 51449 -2 10.14
+orf00066 51963 52115 +3 5.24
+orf00067 52270 53214 +1 7.88
+orf00068 53303 54022 +2 7.17
+orf00069 54113 55585 +2 7.92
+orf00071 57254 55644 -3 4.00
+orf00074 58920 57358 -1 3.13
+orf00075 60110 58974 -3 8.37
+orf00076 60546 60100 -1 6.20
+orf00077 60736 60599 -2 2.19
+orf00078 60878 63595 +2 7.01
+orf00079 63599 64696 +2 7.29
+orf00081 65456 64725 -3 7.50
+orf00083 67171 65465 -2 5.03
+orf00087 68528 67425 -3 2.95
+orf00088 68883 68608 -1 4.64
+orf00089 69282 69034 -1 2.79
+orf00090 69290 70882 +2 5.82
+orf00091 70990 71751 +1 8.94
+orf00092 71910 73148 +3 8.74
+orf00093 73158 74600 +3 9.71
+orf00094 76469 74661 -3 7.36
+orf00095 78241 76655 -2 6.96
+orf00097 79068 78592 -1 7.31
+orf00098 79885 80706 +1 5.58
+orf00099 80804 81478 +2 5.60
+orf00100 81545 82834 +2 6.08
+orf00101 82824 83780 +3 7.15
+orf00103 83807 84946 +2 6.44
+orf00104 85142 87001 +2 5.14
+orf00106 89180 88083 -3 6.78
+orf00107 90280 89180 -2 6.18
+orf00108 90560 91015 +2 7.73
+orf00109 91847 90993 -3 4.46
+orf00110 92776 91913 -2 3.05
+orf00111 93337 92894 -2 4.37
+orf00112 93614 93907 +2 2.56
+orf00113 94270 95952 +1 6.82
+orf00115 95949 96431 +3 7.68
+orf00116 97530 96445 -1 6.05
+orf00118 99439 97700 -2 8.86
+orf00119 99834 99565 -1 6.27
+orf00120 101566 99983 -2 5.30
+orf00121 102010 101570 -2 6.77
+orf00122 103313 102048 -3 7.23
+orf00123 105361 103331 -2 8.16
+orf00124 106539 105457 -1 7.68
+orf00125 106706 106554 -3 0.25
+orf00126 106796 107896 +2 9.21
+orf00127 108801 107905 -1 4.96
+orf00128 109492 108767 -2 4.03
+orf00129 109901 109530 -3 5.96
+orf00131 112553 109908 -3 5.52
+orf00132 113847 112543 -1 5.54
+orf00134 115674 113965 -1 8.20
+orf00135 116036 116974 +2 6.56
+orf00136 116980 117339 +1 3.60
+orf00137 117801 117340 -1 3.84
+orf00138 117936 118397 +3 8.17
+orf00139 118432 119328 +1 7.65
+orf00140 120214 119318 -2 8.26
+orf00142 120452 122422 +2 6.42
+orf00144 123446 122535 -3 4.99
+orf00145 123493 124602 +1 7.34
+orf00146 124652 125407 +2 6.32
+orf00147 126208 125420 -2 3.68
+orf00148 127970 126336 -3 11.73
+orf00149 128316 128008 -1 5.93
+orf00150 130314 128488 -1 7.32
+orf00151 130438 130325 -2 1.51
+orf00152 130461 130583 +3 3.52
+orf00154 130617 133220 +3 7.52
+orf00155 133306 134706 +1 5.34
+orf00156 134936 135361 +2 6.33
+orf00157 135513 135842 +3 5.98
+orf00158 135871 136161 +1 4.46
+orf00159 136283 136158 -3 1.19
+orf00161 136287 136751 +3 5.63
+orf00162 137744 136917 -3 8.27
+orf00165 138142 138843 +1 9.17
+orf00166 138860 139387 +2 8.30
+orf00167 139400 139894 +2 8.56
+orf00168 139898 141271 +2 7.62
+orf00169 141494 141946 +2 6.89
+orf00170 141972 142361 +3 8.09
+orf00172 143260 142409 -2 7.63
+orf00173 144093 143356 -1 5.70
+orf00174 144300 144944 +3 5.45
+orf00175 144941 145642 +2 7.41
+orf00176 149061 145645 -1 7.84
+orf00177 150335 149058 -3 5.95
+orf00178 151216 150413 -2 6.68
+orf00179 151671 152084 +3 3.94
+orf00180 152209 153225 +1 8.41
+orf00181 153276 154034 +3 9.66
+orf00182 154053 154898 +3 6.97
+orf00183 154882 155823 +1 8.28
+orf00184 157100 155820 -3 7.83
+orf00186 157277 157963 +2 6.38
+orf00188 158123 158593 +2 8.10
+orf00189 158835 158716 -1 0.74
+orf00190 158993 159850 +2 5.15
+orf00191 159852 160694 +3 3.57
+orf00193 160694 161551 +2 6.48
+orf00195 161737 163581 +1 7.63
+orf00196 163592 165583 +2 8.67
+orf00197 165681 170030 +3 8.06
+orf00198 171616 170093 -2 5.69
+orf00199 171616 171750 +1 1.78
+orf00200 172763 171816 -3 6.44
+orf00201 173356 174867 +1 7.80
+orf00202 174874 175551 +1 7.06
+orf00203 178167 175702 -1 5.92
+orf00204 179348 178320 -3 4.14
+orf00205 180381 179440 -1 4.95
+orf00206 181952 180738 -3 5.55
+orf00208 182717 182076 -3 2.81
+orf00209 183667 182735 -2 4.12
+orf00211 184316 183813 -3 6.75
+orf00212 185053 184313 -2 6.07
+orf00214 185389 185201 -2 4.18
+orf00216 185673 187319 +3 7.02
+orf00217 187372 187632 +1 1.01
+orf00219 188106 188552 +3 4.50
+orf00221 188549 190468 +2 8.20
+orf00222 190524 191855 +3 4.67
+orf00224 191921 192223 +2 4.36
+orf00225 192279 192563 +3 8.29
+orf00226 192670 192783 +1 2.28
+orf00227 192912 194090 +3 5.14
+orf00228 194083 194844 +1 4.45
+orf00229 195582 195091 -1 3.67
+orf00230 195780 195610 -1 3.61
+orf00231 196120 195848 -2 3.22
+orf00232 196662 196207 -1 7.71
+orf00233 196857 198446 +3 7.14
+orf00235 198880 198473 -2 4.14
+orf00236 199716 198877 -1 4.75
+orf00237 199739 200953 +2 6.54
+orf00238 201614 200955 -3 7.20
+orf00239 202303 201611 -2 8.12
+orf00241 203321 202611 -3 7.31
+orf00243 203689 204453 +1 5.18
+orf00245 204429 206048 +3 7.00
+orf00246 206035 206481 +1 11.16
+orf00247 206598 208121 +3 6.34
+orf00248 208146 208916 +3 8.27
+orf00249 209785 208913 -2 4.50
+orf00250 210410 209799 -3 7.92
+orf00251 212922 210412 -1 7.72
+orf00252 215351 212937 -3 7.56
+orf00253 215518 215354 -2 1.85
+orf00255 216624 215851 -1 7.62
+orf00256 217769 216651 -3 6.23
+orf00258 219368 217896 -3 4.42
+orf00259 220838 219747 -3 5.08
+orf00260 221062 221382 +1 4.35
+orf00263 221449 222474 +1 6.26
+orf00264 222453 223994 +3 5.33
+orf00265 224189 225094 +2 4.52
+orf00266 225177 225971 +3 5.32
+orf00267 225964 226797 +1 6.46
+orf00268 226812 227555 +3 7.46
+orf00270 227868 228617 +3 10.24
+orf00271 228647 230062 +2 6.90
+orf00272 230064 231743 +3 3.56
+orf00273 231752 232600 +2 6.48
+orf00274 232675 234228 +1 6.47
+orf00275 235769 234474 -3 7.57
+orf00276 238225 235766 -2 7.01
+orf00277 238422 239690 +3 7.51
+orf00278 240251 239682 -3 7.62
+orf00279 240717 240271 -1 4.45
+orf00280 241435 240707 -2 7.55
+orf00282 243173 241530 -3 8.66
+orf00283 243477 244523 +3 6.77
+orf00284 244537 245937 +1 6.21
+orf00285 245944 246795 +1 7.80
+orf00286 246926 247777 +2 8.02
+orf00288 247889 248797 +2 5.48
+orf00289 248794 249372 +1 3.82
+orf00290 250265 249369 -3 6.29
+orf00291 250685 250554 -3 5.96
+orf00292 251110 250721 -2 5.38
+orf00293 252064 251252 -2 7.02
+orf00295 252859 252455 -2 8.32
+orf00296 253357 252989 -2 6.52
+orf00297 253986 253456 -1 6.84
+orf00299 254503 254102 -2 4.82
+orf00301 255364 254774 -2 7.73
+orf00303 256161 255514 -1 6.75
+orf00304 256387 257634 +1 5.12
+orf00306 257818 259290 +1 5.79
+orf00307 259591 259460 -2 3.01
+orf00310 259819 260355 +1 5.35
+orf00311 260413 263199 +1 5.67
+orf00313 263228 263641 +2 4.55
+orf00314 263935 263702 -2 6.42
+orf00315 265050 264304 -1 5.89
+orf00316 265973 265047 -3 8.30
+orf00317 266991 265990 -1 8.27
+orf00318 267111 267713 +3 8.68
+orf00319 267793 267906 +1 2.11
+orf00320 268088 270466 +2 6.00
+orf00322 270423 270557 +3 0.59
+orf00324 270562 271056 +1 8.45
+orf00325 271084 272148 +1 5.97
+orf00326 273227 272145 -3 6.89
+orf00327 273563 274585 +2 10.11
+orf00328 274578 275564 +3 9.02
+orf00330 275569 276858 +1 8.09
+orf00331 279329 276885 -3 6.53
+orf00332 279485 279835 +2 3.97
+orf00333 281259 279889 -1 7.35
+orf00334 283693 281336 -2 7.68
+orf00335 284827 284009 -2 6.35
+orf00336 285078 285725 +3 5.83
+orf00337 285855 286499 +3 5.15
+orf00338 287090 286707 -3 9.36
+orf00339 287279 288523 +2 5.53
+orf00340 288513 289727 +3 5.86
+orf00341 290855 289731 -3 7.14
+orf00342 291625 290861 -2 4.80
+orf00345 291930 292421 +3 5.18
+orf00346 292430 293128 +2 4.96
+orf00348 293125 293895 +1 7.31
+orf00349 293888 294478 +2 5.07
+orf00350 296439 294499 -1 3.90
+orf00352 297379 296405 -2 6.32
+orf00353 298693 297512 -2 7.02
+orf00354 299113 298811 -2 6.13
+orf00355 300052 299333 -2 2.83
+orf00356 301478 300027 -3 5.45
+orf00357 303744 301801 -1 7.17
+orf00358 303973 303731 -2 4.52
+orf00359 304920 304018 -1 7.17
+orf00360 305198 305764 +2 5.09
+orf00361 305771 306190 +2 7.67
+orf00362 306433 307800 +1 6.77
+orf00363 307827 308423 +3 7.65
+orf00365 309105 308395 -1 2.67
+orf00367 309116 310567 +2 4.25
+orf00368 310571 311521 +2 7.02
+orf00369 311511 312152 +3 5.56
+orf00371 312158 312892 +2 5.35
+orf00372 313269 312916 -1 7.55
+orf00373 315361 313289 -2 7.59
+orf00374 315342 315482 +3 0.95
+orf00375 316980 315556 -1 5.02
+orf00376 317705 316986 -3 6.78
+orf00378 317970 320534 +3 10.23
+orf00379 321591 320515 -1 5.99
+orf00380 321823 323514 +1 5.89
+orf00382 324740 323601 -3 8.60
+orf00383 325475 324798 -3 7.00
+orf00384 325954 325478 -2 7.46
+orf00385 326393 325956 -3 7.02
+orf00387 327356 326430 -3 7.17
+orf00388 328054 327428 -2 7.04
+orf00389 329961 328180 -1 6.55
+orf00391 330580 330113 -2 4.41
+orf00392 330689 331384 +2 7.44
+orf00393 331501 331361 -2 0.07
+orf00394 331548 332732 +3 5.17
+orf00395 332710 333435 +1 5.95
+orf00396 336990 334228 -1 6.78
+orf00398 339866 337047 -3 7.86
+orf00399 340448 339993 -3 3.44
+orf00400 340836 340429 -1 4.66
+orf00401 342866 340917 -3 7.18
+orf00402 343414 342872 -2 7.84
+orf00403 344733 343468 -1 8.25
+orf00404 346562 344787 -3 8.78
+orf00405 347356 346556 -2 6.49
+orf00407 347529 347374 -1 1.46
+orf00408 348149 347523 -3 11.33
+orf00409 348258 348968 +3 9.09
+orf00411 349368 348976 -1 2.89
+orf00412 350375 349392 -3 8.70
+orf00413 354677 350487 -3 7.74
+orf00414 358460 354702 -3 8.29
+orf00415 359213 358821 -3 10.24
+orf00416 359763 359245 -1 6.74
+orf00418 360483 359785 -1 4.14
+orf00419 360931 360506 -2 8.16
+orf00420 361585 361037 -2 8.39
+orf00421 361837 361589 -2 4.26
+orf00422 363164 361980 -3 4.31
+orf00423 363733 363512 -2 2.42
+orf00424 363807 363929 +3 0.33
+orf00425 364145 365056 +2 5.21
+orf00426 365053 365499 +1 7.34
+orf00427 367242 365551 -1 3.57
+orf00429 367789 367607 -2 5.36
+orf00430 369135 368509 -1 7.04
+orf00431 369431 370156 +2 6.13
+orf00433 370170 371720 +3 3.17
+orf00434 371704 371922 +1 5.73
+orf00435 372022 372201 +1 5.51
+orf00437 372198 374120 +3 6.60
+orf00438 374217 375674 +3 7.29
+orf00440 375697 381057 +1 7.47
+orf00442 381350 382675 +2 6.79
+orf00443 382668 382958 +3 6.37
+orf00444 384683 382968 -3 8.11
+orf00445 385063 384683 -2 4.17
+orf00446 385616 385149 -3 7.58
+orf00447 385867 385667 -2 6.49
+orf00450 385894 387423 +1 4.57
+orf00452 389448 387496 -1 8.42
+orf00453 390745 389567 -2 8.66
+orf00454 390950 390774 -3 4.44
+orf00455 391734 391147 -1 9.52
+orf00456 392039 391923 -3 0.79
+orf00457 392089 394548 +1 7.17
+orf00458 394650 395015 +3 1.79
+orf00460 395365 396279 +1 5.85
+orf00461 396341 397288 +2 5.86
+orf00462 397342 398928 +1 8.48
+orf00463 398964 399554 +3 6.87
+orf00464 399536 401236 +2 7.64
+orf00465 401243 403336 +2 5.16
+orf00467 403718 403410 -3 8.03
+orf00468 404419 403874 -2 6.65
+orf00469 405527 404694 -3 6.04
+orf00470 406712 405543 -3 7.11
+orf00472 409023 406909 -1 7.88
+orf00474 409595 409341 -3 3.02
+orf00475 409594 409812 +1 2.32
+orf00478 409838 410374 +2 5.43
+orf00479 411024 410434 -1 4.75
+orf00483 411803 411276 -3 9.53
+orf00484 412924 412064 -2 6.84
+orf00485 414229 412934 -2 7.37
+orf00487 415220 414222 -3 7.79
+orf00488 415997 415236 -3 8.35
+orf00489 417901 416174 -2 9.18
+orf00491 418071 419393 +3 3.42
+orf00492 419626 419889 +1 2.17
+orf00493 419882 420955 +2 8.03
+orf00494 420952 422073 +1 5.33
+orf00496 422054 423490 +2 4.94
+orf00498 423532 424317 +1 6.02
+orf00499 424387 425787 +1 4.02
+orf00500 425856 426443 +3 6.51
+orf00502 426459 427910 +3 5.63
+orf00503 428082 429140 +3 6.28
+orf00504 430195 429182 -2 8.19
+orf00506 432185 430608 -3 8.71
+orf00508 433641 432298 -1 7.59
+orf00509 434454 433654 -1 6.28
+orf00510 435256 434483 -2 8.35
+orf00511 436086 435325 -1 8.19
+orf00512 436447 436298 -2 1.33
+orf00513 436777 437403 +1 4.25
+orf00514 439033 437414 -2 8.88
+orf00515 439383 439048 -1 6.23
+orf00516 440249 439380 -3 7.88
+orf00517 440536 442611 +1 7.02
+orf00518 442972 442625 -2 2.29
+orf00520 443154 444380 +3 5.86
+orf00521 444488 445672 +2 7.36
+orf00523 445680 446687 +3 5.96
+orf00524 447826 446693 -2 7.74
+orf00526 448033 449772 +1 7.14
+orf00527 449913 451019 +3 4.50
+orf00528 451016 451588 +2 9.35
+orf00529 451614 453596 +3 7.48
+orf00531 453568 453690 +1 1.04
+orf00532 453889 455973 +1 7.41
+orf00533 456229 456993 +1 9.04
+orf00534 458402 457416 -3 6.92
+orf00535 459600 458434 -1 8.08
+orf00537 461072 459846 -3 7.71
+orf00539 462190 461081 -2 7.30
+orf00540 463165 462356 -2 8.67
+orf00541 463980 463153 -1 8.30
+orf00542 464576 463977 -3 5.74
+orf00543 464969 465433 +2 4.18
+orf00544 465447 465821 +3 8.07
+orf00545 465827 466330 +2 5.33
+orf00546 466432 467793 +1 5.95
+orf00547 468008 469285 +2 6.61
+orf00548 469346 471169 +2 4.90
+orf00549 471276 474203 +3 6.42
+orf00550 474333 479597 +3 7.89
+orf00552 479783 485086 +2 7.34
+orf00553 485639 486469 +2 5.79
+orf00554 486466 487176 +1 5.65
+orf00556 488992 487964 -2 4.54
+orf00557 489312 489061 -1 2.94
+orf00558 489666 489343 -1 4.46
+orf00559 490216 490917 +1 3.41
+orf00560 491102 491263 +2 3.38
+orf00561 491122 491006 -2 2.82
+orf00562 491280 491441 +3 2.98
+orf00563 491454 491939 +3 8.90
+orf00565 492072 493181 +3 7.21
+orf00566 493370 493720 +2 7.26
+orf00567 493898 495763 +2 6.67
+orf00568 495776 495991 +2 0.98
+orf00569 495960 497069 +3 6.20
+orf00570 497042 497863 +2 4.21
+orf00571 497835 498488 +3 2.91
+orf00572 499503 498514 -1 6.49
+orf00573 500391 499564 -1 8.34
+orf00574 500950 500360 -2 9.32
+orf00576 502443 500950 -1 7.08
+orf00578 502807 503478 +1 5.97
+orf00579 503581 504117 +1 6.86
+orf00580 505166 504114 -3 6.89
+orf00581 505500 505183 -1 7.33
+orf00582 507592 505508 -2 7.77
+orf00583 508107 507634 -1 9.31
+orf00584 508528 508157 -2 4.28
+orf00586 508788 509126 +3 3.73
+orf00587 509284 511218 +1 7.50
+orf00588 511332 511213 -1 1.17
+orf00589 511792 511340 -2 4.35
+orf00591 513566 511971 -3 5.26
+orf00593 514045 513779 -2 0.49
+orf00594 514382 514606 +2 0.10
+orf00595 516123 514603 -1 7.41
+orf00597 516946 516395 -2 8.62
+orf00598 519105 517351 -1 6.79
+orf00599 523425 519223 -1 4.99
+orf00601 524177 523845 -3 2.44
+orf00602 524640 525401 +3 6.46
+orf00603 525407 526324 +2 4.95
+orf00604 526321 526971 +1 9.03
+orf00605 526968 527618 +3 5.16
+orf00606 527633 529324 +2 7.97
+orf00607 530696 529362 -3 8.52
+orf00608 530908 533925 +1 7.21
+orf00609 534693 533977 -1 7.84
+orf00610 535372 534878 -2 5.37
+orf00611 536382 535378 -1 7.90
+orf00612 536566 536414 -2 0.77
+orf00613 536832 537065 +3 6.01
+orf00615 537135 538112 +3 5.58
+orf00616 538102 538761 +1 6.42
+orf00617 538770 539573 +3 5.28
+orf00618 540199 539525 -2 3.65
+orf00619 540427 540933 +1 3.85
+orf00621 541227 541556 +3 6.30
+orf00622 541534 542592 +1 8.05
+orf00623 542635 543795 +1 6.85
+orf00625 544022 544558 +2 5.58
+orf00626 544528 545259 +1 8.14
+orf00628 545858 545256 -3 6.48
+orf00629 546033 545917 -1 3.36
+orf00630 546083 546265 +2 4.51
+orf00631 546421 547116 +1 6.66
+orf00633 548304 547375 -1 6.75
+orf00634 548392 550764 +1 8.14
+orf00636 550761 551726 +3 2.94
+orf00638 551745 552257 +3 1.88
+orf00640 553990 552254 -2 6.92
+orf00641 555470 553992 -3 7.19
+orf00642 557494 555452 -2 7.11
+orf00644 557933 557793 -3 12.31
+orf00646 558881 558150 -3 6.53
+orf00647 559027 558866 -2 6.01
+orf00648 559696 559043 -2 7.44
+orf00649 559933 560055 +1 2.63
+orf00650 560078 560278 +2 1.90
+orf00651 560341 560529 +1 6.11
+orf00653 561506 560508 -3 6.43
+orf00654 562607 561663 -3 8.01
+orf00655 563414 562629 -3 7.46
+orf00656 564032 563460 -3 8.82
+orf00657 564763 564029 -2 8.41
+orf00659 566177 564852 -3 6.59
+orf00660 566370 566621 +3 2.57
+orf00661 568025 566631 -3 7.96
+orf00662 568630 568022 -2 8.11
+orf00663 571224 568624 -1 7.86
+orf00664 572236 571241 -2 7.70
+orf00666 573997 572375 -2 6.05
+orf00667 574679 574209 -3 4.51
+orf00668 575118 574972 -1 0.19
+orf00670 575229 576647 +3 7.62
+orf00672 576941 578773 +2 7.07
+orf00673 578763 579464 +3 8.09
+orf00674 579991 579521 -2 8.78
+orf00675 580708 580106 -2 8.73
+orf00676 581240 580728 -3 9.15
+orf00677 581902 581348 -2 8.23
+orf00679 583055 582189 -3 8.75
+orf00680 584070 583066 -1 6.76
+orf00681 584482 584114 -2 3.35
+orf00682 585621 584548 -1 6.12
+orf00683 586100 585702 -3 7.00
+orf00684 586490 586122 -3 3.52
+orf00685 587919 586546 -1 5.90
+orf00686 588376 587942 -2 7.85
+orf00687 588866 588369 -3 8.05
+orf00688 589252 588881 -2 6.46
+orf00689 589825 589274 -2 8.97
+orf00690 590254 589853 -2 6.51
+orf00692 590814 590272 -1 5.24
+orf00693 591112 590816 -2 4.33
+orf00694 591532 591164 -2 2.95
+orf00695 591800 591549 -3 1.65
+orf00696 592011 591793 -1 7.91
+orf00697 592423 592013 -2 6.16
+orf00698 593136 592462 -1 7.78
+orf00699 593481 593146 -1 1.08
+orf00700 593703 593500 -1 1.00
+orf00701 594626 593772 -3 5.63
+orf00702 594985 594650 -2 3.13
+orf00703 595669 595001 -2 7.25
+orf00704 596343 595678 -1 6.36
+orf00706 596809 596672 -2 4.25
+orf00707 597674 596778 -3 3.73
+orf00708 598790 597840 -3 8.00
+orf00709 599622 598780 -1 6.77
+orf00710 600080 599634 -3 8.66
+orf00711 600952 600092 -2 6.00
+orf00712 602633 601053 -3 3.44
+orf00713 603227 602745 -3 4.20
+orf00714 604115 603363 -3 6.80
+orf00715 604508 604119 -3 5.94
+orf00716 605287 604571 -2 8.28
+orf00717 606009 606317 +3 6.66
+orf00718 606822 606367 -1 7.60
+orf00720 607542 606838 -1 9.79
+orf00721 609410 607707 -3 7.33
+orf00723 610722 609436 -1 5.16
+orf00724 610710 610970 +3 3.06
+orf00725 611158 612528 +1 5.22
+orf00727 612626 616255 +2 7.49
+orf00729 617353 616484 -2 8.16
+orf00730 617442 618461 +3 8.09
+orf00731 618486 619070 +3 6.07
+orf00732 619057 619497 +1 5.19
+orf00733 619929 619504 -1 6.19
+orf00735 620076 621068 +3 4.16
+orf00737 621279 621398 +3 4.92
+orf00738 621569 621369 -3 2.20
+orf00739 621594 621890 +3 7.39
+orf00740 621887 622111 +2 3.61
+orf00741 622120 622992 +1 7.46
+orf00742 623142 624377 +3 5.77
+orf00743 624552 628151 +3 7.26
+orf00744 628329 628808 +3 5.63
+orf00745 628891 629004 +1 2.46
+orf00746 629017 630414 +1 7.42
+orf00747 630411 631346 +3 6.44
+orf00748 631450 632430 +1 8.24
+orf00749 632431 633267 +1 8.07
+orf00750 633352 633546 +1 0.77
+orf00751 633543 634214 +3 8.81
+orf00753 634227 635147 +3 7.40
+orf00754 635192 635443 +2 5.17
+orf00755 635450 636319 +2 5.62
+orf00756 636840 636397 -1 5.48
+orf00757 637878 636931 -1 8.60
+orf00758 638453 637929 -3 2.90
+orf00759 638866 638438 -2 2.29
+orf00761 639251 638877 -3 6.46
+orf00763 640443 639268 -1 5.01
+orf00764 641936 640455 -3 5.13
+orf00765 644226 641944 -1 8.08
+orf00766 645456 644227 -1 8.09
+orf00767 646654 645584 -2 6.87
+orf00768 648395 646665 -3 8.01
+orf00770 648690 649388 +3 6.20
+orf00771 649405 649764 +1 7.42
+orf00772 649801 651264 +1 9.43
+orf00773 651295 652614 +1 7.88
+orf00774 653606 652692 -3 5.11
+orf00775 653905 655887 +1 7.54
+orf00776 656338 657105 +1 9.31
+orf00777 657110 657901 +2 6.55
+orf00778 657867 658418 +3 4.41
+orf00779 658617 659657 +3 7.64
+orf00780 659682 661688 +3 8.79
+orf00781 661850 663124 +2 8.48
+orf00782 663251 665203 +2 7.50
+orf00783 665328 667136 +3 8.10
+orf00784 670015 667151 -2 7.93
+orf00786 670771 670112 -2 9.14
+orf00787 672768 671047 -1 5.08
+orf00788 673283 672765 -3 4.18
+orf00789 673551 673360 -1 3.69
+orf00791 674564 673773 -3 9.14
+orf00793 676727 674649 -3 5.56
+orf00796 676880 677578 +2 4.23
+orf00797 677575 677982 +1 8.05
+orf00798 677985 678692 +3 6.85
+orf00800 678692 679987 +2 4.98
+orf00801 679984 680550 +1 5.55
+orf00802 680540 681142 +2 5.94
+orf00803 681213 681605 +3 2.01
+orf00804 682189 681602 -2 9.82
+orf00806 683927 682398 -3 7.62
+orf00807 685305 684076 -1 6.95
+orf00808 685412 685537 +2 1.09
+orf00809 685503 686132 +3 6.63
+orf00811 686258 686106 -3 7.77
+orf00812 687019 686330 -2 6.07
+orf00813 687100 689004 +1 6.49
+orf00814 689008 690318 +1 8.76
+orf00816 691139 690426 -3 9.03
+orf00817 691849 691118 -2 4.20
+orf00818 692300 691821 -3 5.39
+orf00819 693649 692297 -2 7.59
+orf00820 694020 693646 -1 5.47
+orf00821 695754 694039 -1 9.66
+orf00822 697054 695903 -2 7.97
+orf00824 697629 697937 +3 3.30
+orf00826 698170 698970 +1 2.94
+orf00827 701659 699026 -2 6.84
+orf00828 701774 704290 +2 6.59
+orf00829 706852 704354 -2 7.74
+orf00831 709023 707080 -1 8.94
+orf00832 710453 709131 -3 6.87
+orf00833 710495 710638 +2 0.31
+orf00835 712282 710672 -2 5.56
+orf00836 712455 713321 +3 5.91
+orf00838 714047 713418 -3 7.67
+orf00840 714371 714246 -3 0.59
+orf00842 714431 715414 +2 7.66
+orf00846 716361 715486 -1 8.07
+orf00847 717034 716417 -2 7.41
+orf00849 717770 717087 -3 7.12
+orf00850 717949 718203 +1 6.72
+orf00851 719973 718384 -1 7.94
+orf00853 720135 720022 -1 1.59
+orf00854 720367 721287 +1 6.25
+orf00855 722710 721313 -2 8.79
+orf00857 723166 722732 -2 8.14
+orf00858 723280 725427 +1 10.37
+orf00859 725615 726817 +2 5.89
+orf00860 726792 727559 +3 4.37
+orf00861 727592 727801 +2 6.38
+orf00862 730897 727817 -2 8.09
+orf00865 733724 730899 -3 5.10
+orf00866 733926 735605 +3 4.50
+orf00867 736553 736437 -3 1.82
+orf00868 736440 735625 -1 7.96
+orf00869 736821 736672 -1 0.80
+orf00870 737335 739908 +1 8.36
+orf00871 740942 739938 -3 7.41
+orf00872 741318 742697 +3 6.15
+orf00873 742697 743275 +2 4.75
+orf00874 743323 744540 +1 4.37
+orf00875 744543 745079 +3 8.19
+orf00877 745340 746398 +2 8.28
+orf00880 746579 746424 -3 0.95
+orf00881 746705 748510 +2 6.55
+orf00882 748507 749997 +1 7.96
+orf00883 750063 750242 +3 4.20
+orf00884 751062 750343 -1 5.79
+orf00886 751559 751071 -3 2.89
+orf00887 752365 751556 -2 5.06
+orf00888 752650 752790 +1 2.52
+orf00889 752850 753143 +3 3.14
+orf00890 753143 753460 +2 1.20
+orf00892 753544 754548 +1 5.50
+orf00893 754647 754883 +3 7.17
+orf00895 756494 755022 -3 6.86
+orf00896 758326 756509 -2 8.08
+orf00897 758670 759713 +3 5.96
+orf00899 760331 760185 -3 3.67
+orf00900 760425 760826 +3 7.99
+orf00901 760830 763319 +3 8.56
+orf00902 763372 763614 +1 8.33
+orf00903 763773 763892 +3 2.60
+orf00904 763911 764360 +3 10.69
+orf00905 764380 765051 +1 6.05
+orf00906 765122 766381 +2 8.79
+orf00907 766404 766910 +3 9.69
+orf00908 766914 767765 +3 9.82
+orf00910 767775 768896 +3 10.08
+orf00911 769031 770503 +2 5.87
+orf00912 770509 773265 +1 6.53
+orf00913 774648 773578 -1 5.90
+orf00914 775170 775325 +3 0.26
+orf00915 775159 774638 -2 3.98
+orf00916 776069 775530 -3 7.10
+orf00917 776803 776066 -2 5.31
+orf00919 777664 776816 -2 8.43
+orf00920 778422 777661 -1 9.45
+orf00921 780060 778879 -1 6.39
+orf00923 780668 783910 +2 8.39
+orf00925 783974 784981 +2 8.12
+orf00927 785324 786775 +2 5.70
+orf00928 786778 787545 +1 8.58
+orf00929 787549 788736 +1 7.27
+orf00930 788729 789934 +2 8.96
+orf00931 790070 790915 +2 8.95
+orf00932 791737 790907 -2 6.10
+orf00933 792695 791730 -3 8.73
+orf00934 792856 793530 +1 6.71
+orf00935 793533 794813 +3 6.16
+orf00936 794941 796152 +1 8.93
+orf00938 796412 797383 +2 7.12
+orf00939 797434 798630 +1 8.01
+orf00940 798683 799894 +2 5.39
+orf00941 800526 799891 -1 4.05
+orf00942 801867 800533 -1 8.71
+orf00943 802134 803039 +3 4.99
+orf00944 803146 804429 +1 7.55
+orf00945 804494 804613 +2 0.20
+orf00946 804688 807597 +1 7.37
+orf00947 808236 807691 -1 4.42
+orf00948 809767 808295 -2 7.11
+orf00949 811115 809883 -3 8.01
+orf00950 812389 811130 -2 8.99
+orf00951 812959 812399 -2 8.88
+orf00952 814505 813177 -3 9.63
+orf00954 814743 814627 -1 5.05
+orf00955 814862 818353 +2 7.51
+orf00956 818358 819458 +3 6.93
+orf00957 819455 821254 +2 7.09
+orf00958 821366 823669 +2 7.05
+orf00959 823696 824868 +1 6.36
+orf00960 825916 824894 -2 5.28
+orf00961 827053 826049 -2 6.79
+orf00962 828417 827050 -1 6.70
+orf00963 828794 828429 -3 1.62
+orf00965 830091 828787 -1 9.13
+orf00966 830677 830165 -2 7.37
+orf00968 831698 830694 -3 7.89
+orf00970 832753 831971 -2 6.56
+orf00971 833904 832750 -1 5.39
+orf00972 834539 833859 -3 5.85
+orf00973 834836 835561 +2 6.89
+orf00974 835680 836204 +3 8.93
+orf00975 836231 836785 +2 5.84
+orf00976 837931 836792 -2 4.23
+orf00978 840002 838023 -3 8.31
+orf00979 840772 840026 -2 6.33
+orf00980 842095 840809 -2 5.70
+orf00981 842137 843264 +1 4.41
+orf00982 843374 844648 +2 2.97
+orf00983 844617 845090 +3 5.94
+orf00984 846454 845141 -2 6.50
+orf00985 846872 847537 +2 7.30
+orf00986 847642 849009 +1 7.09
+orf00988 849067 849519 +1 8.46
+orf00990 850187 849528 -3 5.44
+orf00991 850972 850184 -2 4.82
+orf00993 853375 850976 -2 6.66
+orf00994 853929 853717 -1 2.12
+orf00995 858450 857821 -1 2.92
+orf00997 860910 859615 -1 7.59
+orf00999 861397 861053 -2 9.83
+orf01000 862692 861502 -1 4.94
+orf01002 862710 863306 +3 5.22
+orf01003 863728 866118 +1 7.37
+orf01004 867387 866113 -1 8.16
+orf01005 868757 867384 -3 6.65
+orf01006 869740 868730 -2 6.60
+orf01007 872992 869753 -2 8.09
+orf01008 875742 872968 -1 7.71
+orf01009 875975 875745 -3 0.90
+orf01010 880496 879867 -3 2.92
+orf01012 881422 883422 +1 8.08
+orf01014 884288 883419 -3 6.99
+orf01015 884508 885080 +3 6.86
+orf01016 885320 885096 -3 6.06
+orf01017 886239 885367 -1 6.24
+orf01019 887837 886320 -3 8.25
+orf01020 888306 889658 +3 8.55
+orf01021 889804 890814 +1 6.95
+orf01022 890826 892076 +3 8.92
+orf01023 892073 892810 +2 9.07
+orf01024 892826 893983 +2 4.01
+orf01025 894099 894950 +3 6.34
+orf01026 894955 897366 +1 6.05
+orf01028 897822 897403 -1 9.24
+orf01029 898597 897980 -2 8.58
+orf01031 898940 899272 +2 8.41
+orf01032 899276 900295 +2 7.39
+orf01033 901352 900300 -3 7.25
+orf01035 901442 903130 +2 9.90
+orf01036 903584 903943 +2 4.87
+orf01037 903954 905210 +3 6.95
+orf01038 905207 905659 +2 8.12
+orf01041 906369 905740 -1 7.39
+orf01042 906617 907657 +2 7.91
+orf01043 908687 907623 -3 6.02
+orf01044 908950 909711 +1 4.88
+orf01045 909719 911332 +2 6.82
+orf01047 914688 912427 -1 3.12
+orf01049 915332 914643 -3 2.93
+orf01050 915488 915982 +2 8.55
+orf01052 916020 916133 +3 4.28
+orf01053 916214 917794 +2 8.78
+orf01054 919284 917791 -1 6.97
+orf01056 919539 920588 +3 6.33
+orf01057 920875 920666 -2 1.02
+orf01058 921178 921041 -2 0.32
+orf01059 921356 921487 +2 2.97
+orf01060 921655 921792 +1 2.35
+orf01061 921816 922121 +3 4.60
+orf01062 922601 922155 -3 8.03
+orf01065 922962 922777 -1 4.53
+orf01066 923307 923801 +3 10.00
+orf01067 925642 923846 -2 6.62
+orf01069 928108 925667 -2 6.36
+orf01071 928463 930250 +2 8.05
+orf01072 930344 930622 +2 6.92
+orf01073 930688 930801 +1 3.08
+orf01074 930758 931240 +2 6.03
+orf01077 934333 931322 -2 6.77
+orf01078 934706 934557 -3 0.90
+orf01080 934854 935402 +3 3.53
+orf01081 936833 935409 -3 6.39
+orf01082 937061 936927 -3 1.90
+orf01083 937410 937967 +3 8.07
+orf01084 937975 938514 +1 6.52
+orf01085 938652 938990 +3 4.50
+orf01086 939007 939252 +1 3.78
+orf01088 939273 939776 +3 8.86
+orf01089 939789 940706 +3 6.51
+orf01090 940879 940676 -2 7.87
+orf01092 942361 941009 -2 4.39
+orf01093 945410 942540 -3 7.75
+orf01094 946462 945467 -2 7.04
+orf01095 948013 946475 -2 5.50
+orf01097 948460 948888 +1 5.60
+orf01099 949350 950315 +3 5.58
+orf01100 950536 955131 +1 9.72
+orf01102 955429 955247 -2 1.59
+orf01103 955443 956168 +3 10.30
+orf01104 956599 956252 -2 5.14
+orf01105 956934 956572 -1 2.00
+orf01107 957180 958556 +3 8.76
+orf01108 958567 960387 +1 7.85
+orf01109 960463 961686 +1 7.64
+orf01110 961729 961884 +1 3.12
+orf01112 961868 963061 +2 7.21
+orf01113 963103 963819 +1 3.93
+orf01115 964771 963917 -2 8.93
+orf01116 964841 966001 +2 8.57
+orf01117 966016 966891 +1 5.76
+orf01118 967030 968523 +1 8.70
+orf01119 968723 971647 +2 7.84
+orf01120 972944 971661 -3 7.98
+orf01122 973739 972948 -3 6.18
+orf01123 974375 977518 +2 7.17
+orf01124 977556 978596 +3 6.13
+orf01125 978856 979530 +1 5.74
+orf01126 979715 980230 +2 6.52
+orf01127 981102 980227 -1 8.25
+orf01129 981732 981235 -1 7.36
+orf01131 982256 982699 +2 4.86
+orf01133 982923 983294 +3 4.69
+orf01134 983301 984329 +3 7.38
+orf01135 984336 984452 +3 1.00
+orf01136 984639 986615 +3 7.79
+orf01137 987712 986612 -2 6.68
+orf01138 988779 987715 -1 5.53
+orf01140 988877 989842 +2 7.53
+orf01142 990003 992744 +3 9.05
+orf01143 995046 992959 -1 8.31
+orf01144 995344 995075 -2 0.82
+orf01145 995570 996061 +2 9.28
+orf01146 996093 996311 +3 6.67
+orf01147 996987 996283 -1 4.26
+orf01148 997640 997122 -3 5.58
+orf01150 998144 997656 -3 4.90
+orf01151 998669 998190 -3 5.06
+orf01152 998988 999176 +3 5.53
+orf01154 999195 1000412 +3 6.22
+orf01155 1001244 1000369 -1 3.27
+orf01156 1001916 1001302 -1 8.73
+orf01157 1002620 1001934 -3 3.77
+orf01158 1004497 1002728 -2 6.46
+orf01159 1004550 1005941 +3 8.41
+orf01160 1007722 1006022 -2 5.41
+orf01161 1008641 1009747 +2 5.70
+orf01163 1009837 1011666 +1 6.84
+orf01164 1011761 1012684 +2 7.16
+orf01166 1014217 1012736 -2 7.30
+orf01168 1015749 1014229 -1 4.67
+orf01169 1016294 1015776 -3 5.79
+orf01171 1017742 1016294 -2 5.55
+orf01172 1018301 1018606 +2 4.22
+orf01173 1019505 1018603 -1 6.41
+orf01177 1019790 1020743 +3 5.54
+orf01178 1020758 1022974 +2 7.60
+orf01179 1023485 1022985 -3 6.22
+orf01182 1025471 1024215 -3 6.89
+orf01184 1028542 1025648 -2 5.93
+orf01185 1031649 1028545 -1 4.67
+orf01186 1031814 1034855 +3 6.98
+orf01189 1034877 1037936 +3 3.74
+orf01191 1038393 1038653 +3 2.43
+orf01192 1038701 1038835 +2 4.42
+orf01193 1041631 1039010 -2 7.01
+orf01194 1041632 1041775 +2 0.25
diff --git a/inst/sequences/ecolicgpe5.fasta b/inst/sequences/ecolicgpe5.fasta
new file mode 100644
index 0000000..78a2776
--- /dev/null
+++ b/inst/sequences/ecolicgpe5.fasta
@@ -0,0 +1,6 @@
+>ECOLICG.PE5
+gtgaaaaagatgcaatctatcgtactcgcactttccctggttctggtcgctcccatggca
+gcacaggctgcggaaattacgttagtcccgtcagtaaaattacagataggcgatcgtgat
+aatcgtggctattactgggatggaggtcactggcgcgaccacggctggtggaaacaacat
+tatgaatggcgaggcaatcgctggcacctacacggaccgccgccaccgccgcgccaccat
+aagaaagctcctcatgatcatcacggcggtcatggtccaggcaaacatcaccgctaa
diff --git a/inst/sequences/gopher.fasta b/inst/sequences/gopher.fasta
new file mode 100644
index 0000000..cf5f8de
--- /dev/null
+++ b/inst/sequences/gopher.fasta
@@ -0,0 +1,56 @@
+>gi|548223|gb|L32683.1|PPGCYTOXIA Geomys breviceps mitochondrial cytochrome oxidase I gene, partial cds
+TGAAGTTTATATCTTGATCCTGCCAGGATTTGGAATAATTTCACATATTGTTACTTACTATTCCGGAAAA
+AAAGAGCCTTTTGGCTATATAGGTATAGTTTGAGCTATAATATCAATCGGATTTTTAGGCTTTATCGTGT
+GAGCCCACCATATATTTACAGTAGGTATGGATGTAGACACACGAGCTTACTTTACATCTGCTACTATAAT
+TATCGCCATCCCAACTGGAGTGAAAGTATTTAGCTGATTAGCCACTTTACACGGAGGTAATATTAAATGG
+TCACCTGCTATATTGTGAGCGCTAGGTTTTATTTTCCTTTTCACTATCGGCGGATTAACTGGAATCGTCC
+TGTCCAACTCATCACTAGACATTGTACTG
+>gi|548197|gb|L32686.1|OGOCYTOXIA Orthogeomys cavator mitochondrial cytochrome oxidase I gene, partial cds
+TGAAGTTTATATCTTAATTCTCCCAGGCTTCGGAATGATTTCTCATATTGTCACTTACTACTCAGGTAAA
+AAAGAACCATTTGATTATATAGGCGTGGTATGAGCTATAATATCCATCGGATTCCTAGGTTTGATAGTAT
+GAGCCCACCATATATTCACAGTAGGAATAGACGTAGACACACGAGCTTATTTCACATCCGCTACTATAAT
+TATTGCTATTCCCACCGGAGTAAAAGTATTCAGTTGACTGGCTACCATGAACGGAGGTAATATTAAATGA
+TCTCCTGCCATACTATGAGCTTTAGGCTTTATCTTCCTATTCACAATTGGTGGCCTAACCGGCATTGTAT
+TATCAAATTCATCCTTAGATATTATTCTA
+>gi|548199|gb|L32687.1|OGOCYTOXIB Orthogeomys cherriei mitochondrial cytochrome oxidase I gene, partial cds
+TGAAGTTTACATCTTAATCCTCCCAGGCTTCGAAATAATCTCTCATATTGTCACTTATTATTCAGGTAAA
+AAAGAACCCTTTGGCTATATAGGTATGGTATGAGCTATAATATCAATTGGTTTCCTAGGTTTAATGGTAT
+GAGCCCACCATATATTTACAGTAGGAATAGACGTAGATACACGAGCCTACTTTACATCCGCTACTATAAT
+TATTGCTATTCCTACCGGAGTAAAAGTATTCAGTTGACTAGCTACCATGAACGGAGGCAATATTAAATGA
+TCCCCTGCCATATTATGAGCTTTAGGTTTTATTTTCCTATTTACAATTGGTGGCCTAACCGGCATTGTAT
+TATCAAACTCATCCTTAGATATTGTCCTA
+>gi|548201|gb|L32691.1|OGOCYTOXIC Orthogeomys underwoodi mitochondrial cytochrome oxidase I gene, partial cds
+TGAAGTTTATATCTTGATCCTCCCAGGATTCGGAATGATTTCTCATATTGTCACCTACTATTCAGGTAAA
+AAAGAACCCTTTGATTATATAGGCATGGTATGAGCTATAATATCTATTGGTTTCCTAGGTTTTATAGTAT
+GAGCCCACCATATATTCACAGTAGGGATAGACGTAGATACACGAGCTTACTTTACATCCGCTACTATGAT
+TATCGCTATCCCTACCGGAGTAAAAGTGTTCAGTTGACTAGCTACCATGAACGGAGGTAATATTAAATGA
+TCTCCTGCCATATTATGAGCCTTAGGTTTTATTTTCCTGTTCACAATTGGTGGACTAACAGGCATTGTAT
+TATCTAATTCATCCTTAGACATTATTCTA
+>gi|548203|gb|L32692.1|OGOCYTOXID Orthogeomys hispidus mitochondrial cytochrome oxidase I gene, partial cds
+TGAGGTTTATATCTTAATCCTCCCAGGCTTCGGTATAATTTCTCATATCGTCACTTATTACTCAGGCAAA
+AAAGAACCATTTGGATATATAGGCATGGTATGAGCCATAATATCTATTGGTTTCTTAGGTTTCATGGTAT
+GAGCCCACCATATATTCACAGTAGGAATAGACGTAGACACACGAGCTTATTTTACATCTGCTACTATAAT
+TATCGCTATTCCTACCGGGGTAAAAGTATTCAGTTGACTGGCTACACTGCATGGAGGTAATATTAAATGA
+TCTCCTGCCATATTATGAGCTTTAGGTTTTATTTTCCTATTTACTATCGGTGGTCTAACGGGCATTGTCC
+TATCAAATTCATCTTTGGATATCGTTCTA
+>gi|548229|gb|L32693.1|PPGCYTOXID Geomys bursarius mitochondrial cytochrome oxidase I gene, partial cds
+TGAAGTTTACATCTTAATCCTACCCGGATTCGGAATAATTTCCCATATTGTTACCTATTACTCAGGAAAA
+AAAGAGCCTTTTGGCTACATAGGCATAGTTTGAGCCATGATATCAATTGGATTCCTAGGCTTTATTGTTT
+GAGCCCATCATATATTTACAGTAGGTATGGACGTAGATACCCGAGCCTATTTTACATCTGCAACTATAAT
+CATCGCTATCCCAACAGGAGTAAAAGTATTCAGCTGATTAGCTACTTTACACGGGGGTAATATTAAATGA
+TCACCTGCTATACTGTGAGCATTAGGCTTTATTTTCCTTTTCACTATTGGCGGGTTAACTGGAATTGTCC
+TGTCCAACTCATCATTAGACATTGTTCTA
+>gi|548231|gb|L32694.1|PPGCYTOXIE Geomys bursarius mitochondrial cytochrome oxidase I gene, partial cds
+TGAAGTTTATATCTTAATTCTACCTGGATTCGGAATAATTTCACACATTGTTACTTATTATTCAGGAAAA
+AAAGAACCTTTTGGCTACATAGGCATAGTTTGAGCTATGATATCAATTGGATTCCTAGGCTTTATTGTAT
+GAGCCCATCATATATTTACAGTAGGTATGGATGTAGACACCCGAGCCTATTTTACATCTGCAACTATAAT
+CATTGCTATCCCAACAGGAGTAAAAGTGTTTAGCTGACTAGCTACTTTACACGGAGGTAATATTAAATGA
+TCTCCTGCTATACTGTGAGCATTAGGCTTTATTTTCCTTTTCACTATTGGCGGATTAACTGGAATCGTCC
+TATTCAACTCATCATTAGATATTGTATTA
+>gi|548205|gb|L32696.1|OGOCYTOXIE Orthogeomys heterodus mitochondrial cytochrome oxidase I gene, partial cds
+TGAAGTTTACATCTTAATCCTCCCAGGCTTCGGAATGATTTCTCATATTGTCACTTATTATTCAGGTAAA
+AAAGAACCCTTTGGCTATATAGGCATGGTATGAGCTATAATATCAATTGGTTTCCTAGGTTTTATGGTAT
+GAGCCCACCATATATTTACAGTAGGGATAGACGTAGATACACGAGCCTACTTTACATCCGCTACTATAAT
+TATTGCTATTCCTACAGGAGTAAAAGTATTCAGTTGACTGGCTACCATGAACGGAGGTAATATTAAATGA
+TCCCCTGCCATATTATGAGCTTTAGGTTTTATTTTCCTATTTACAATTGGGGGCCTAACCGGCATTGTAC
+TATCCAACTCATCATTGGATATTGTTCTA
diff --git a/inst/sequences/gopher.names b/inst/sequences/gopher.names
new file mode 100644
index 0000000..41d9fdb
--- /dev/null
+++ b/inst/sequences/gopher.names
@@ -0,0 +1,8 @@
+G.brevicep
+O.cavator
+O.cherriei
+O.underwoo
+O.hispidus
+G.burs1
+G.burs2
+O.heterodu
diff --git a/inst/sequences/hannah.txt b/inst/sequences/hannah.txt
new file mode 100644
index 0000000..9cad061
--- /dev/null
+++ b/inst/sequences/hannah.txt
@@ -0,0 +1,1784 @@
+AGI Hydrophilicity Glycine
+At2g19570 -0.10 0.07
+At2g45290 -0.25 0.09
+At4g29570 -0.05 0.07
+At4g29580 -0.10 0.06
+At4g29600 -0.14 0.06
+At5g28050 -0.11 0.08
+At3g23570 -0.26 0.06
+At3g23600 -0.09 0.08
+At5g64250 -0.01 0.10
+At3g30775 -0.21 0.06
+At1g30100 -0.34 0.07
+At1g78390 -0.34 0.07
+At3g14440 -0.30 0.08
+At3g24220 -0.23 0.08
+At4g18350 -0.29 0.07
+At5g23050 -0.10 0.08
+At5g36880 -0.18 0.09
+At4g34880 0.08 0.08
+At5g07360 -0.19 0.06
+At1g09780 -0.27 0.10
+At1g13440 -0.14 0.08
+At1g16300 -0.13 0.08
+At1g22170 -0.38 0.05
+At1g42970 0.00 0.08
+At1g56190 0.17 0.10
+At1g58150 -0.69 0.13
+At1g74030 -0.18 0.09
+At1g78040 -0.27 0.05
+At1g78050 -0.44 0.05
+At1g79530 -0.14 0.08
+At1g79550 0.13 0.10
+At1g80460 -0.04 0.10
+At2g21170 -0.09 0.10
+At2g36580 0.02 0.06
+At2g40690 0.13 0.06
+At2g41540 -0.11 0.07
+At3g04050 -0.05 0.06
+At3g04120 -0.14 0.08
+At3g08590 -0.26 0.10
+At3g10370 -0.14 0.08
+At3g12780 0.15 0.10
+At3g22960 -0.11 0.07
+At3g25960 0.04 0.07
+At3g26650 -0.02 0.09
+At3g43290 -0.69 0.09
+At3g49160 -0.17 0.07
+At3g55440 0.08 0.09
+At3g55650 0.03 0.07
+At3g55810 0.06 0.07
+At3g56840 -0.19 0.08
+At4g26390 -0.02 0.07
+At5g04120 -0.34 0.08
+At5g08570 0.03 0.07
+At5g52920 -0.20 0.07
+At5g56350 -0.02 0.07
+At5g63680 -0.02 0.07
+At1g17290 -0.24 0.07
+At1g23310 -0.18 0.09
+At1g70580 -0.19 0.08
+At1g17290 -0.24 0.07
+At1g23310 -0.18 0.09
+At1g70580 -0.19 0.08
+At4g34880 0.08 0.08
+At5g07360 -0.19 0.06
+At1g48470 -0.48 0.10
+At1g66200 -0.42 0.11
+At3g17820 -0.46 0.12
+At3g53170 -0.13 0.06
+At3g53180 -0.13 0.07
+At5g16570 -0.43 0.11
+At5g37600 -0.42 0.11
+At5g53460 -0.29 0.09
+At1g25460 -0.17 0.06
+At2g38240 -0.41 0.08
+At4g22870 -0.26 0.05
+At4g22880 -0.39 0.06
+At4g27250 -0.19 0.05
+At4g33360 -0.03 0.08
+At4g35420 -0.13 0.06
+At5g42800 -0.24 0.06
+At1g75330 -0.15 0.06
+At4g24830 -0.21 0.08
+At5g10920 -0.25 0.06
+At5g46180 -0.18 0.08
+At1g29900 -0.10 0.07
+At1g75330 -0.15 0.06
+At1g80600 0.11 0.09
+At2g19940 -0.14 0.07
+At3g27740 -0.30 0.09
+At3g57560 0.09 0.09
+At4g24830 -0.21 0.08
+At4g37670 -0.09 0.10
+At5g10920 -0.25 0.06
+At4g08870 -0.23 0.10
+At5g46180 -0.18 0.08
+At3g30775 -0.21 0.06
+At4g08870 -0.23 0.10
+At5g52810 -0.12 0.07
+At4g08870 -0.23 0.10
+At5g14800 0.25 0.10
+At5g46180 -0.18 0.08
+At1g23820 -0.15 0.07
+At1g70310 -0.10 0.08
+At2g16500 -0.08 0.10
+At4g34710 -0.09 0.09
+At5g19530 -0.31 0.06
+At5g53120 -0.07 0.08
+At1g67070 -0.25 0.07
+At2g39770 0.03 0.09
+At2g45790 -0.38 0.08
+At3g02570 -0.29 0.06
+At3g55590 0.10 0.09
+At4g24620 -0.17 0.08
+At5g42740 -0.11 0.07
+At5g10240 -0.38 0.07
+At5g65010 -0.36 0.07
+At1g62800 -0.09 0.07
+At1g77670 -0.09 0.06
+At1g80360 -0.09 0.09
+At2g13810 -0.24 0.08
+At2g22250 0.06 0.07
+At2g30970 -0.32 0.08
+At3g16150 0.05 0.12
+At4g31990 -0.17 0.08
+At5g08100 -0.01 0.10
+At5g11520 -0.11 0.09
+At5g19550 -0.12 0.08
+At1g62800 -0.09 0.07
+At1g77670 -0.09 0.06
+At1g80360 -0.09 0.09
+At2g13810 -0.24 0.08
+At2g22250 0.06 0.07
+At2g30970 -0.32 0.08
+At4g31990 -0.17 0.08
+At5g11520 -0.11 0.09
+At5g19550 -0.12 0.08
+At2g22330 -0.16 0.05
+At3g44300 -0.15 0.09
+At3g44310 -0.19 0.09
+At3g44320 -0.17 0.09
+At4g28680 -0.24 0.08
+At4g32540 -0.27 0.10
+At4g39950 -0.19 0.05
+At5g20960 -0.10 0.08
+At1g74920 -0.01 0.08
+At3g48170 -0.03 0.08
+At4g29890 -0.29 0.08
+At1g01390 0.02 0.06
+At1g01420 -0.05 0.07
+At2g18570 0.01 0.07
+At2g22930 -0.02 0.07
+At2g29710 0.13 0.06
+At2g29730 0.07 0.05
+At2g29740 0.04 0.05
+At2g29750 0.04 0.06
+At3g29630 -0.07 0.07
+At4g01070 -0.12 0.07
+At5g54010 -0.07 0.08
+At5g54060 -0.11 0.07
+At1g03630 -0.39 0.07
+At1g08520 -0.33 0.06
+At1g09940 -0.13 0.05
+At1g58290 -0.20 0.06
+At1g69740 -0.22 0.07
+At2g31250 -0.28 0.04
+At2g40490 -0.11 0.07
+At3g14930 -0.10 0.08
+At3g48730 -0.02 0.12
+At3g51820 0.26 0.09
+At4g01690 -0.13 0.11
+At4g18480 -0.25 0.07
+At4g25080 -0.09 0.07
+At4g27440 -0.24 0.07
+At5g08280 -0.14 0.08
+At5g13630 -0.28 0.06
+At5g14220 -0.30 0.08
+At5g26710 -0.40 0.06
+At5g53090 -0.05 0.06
+At5g53100 0.03 0.05
+At5g54190 -0.21 0.07
+At5g63570 0.00 0.12
+At5g64050 -0.26 0.08
+At1g09940 -0.13 0.05
+At1g58290 -0.20 0.06
+At1g69740 -0.22 0.07
+At2g31250 -0.28 0.04
+At2g40490 -0.11 0.07
+At3g14930 -0.10 0.08
+At3g48730 -0.02 0.12
+At4g01690 -0.13 0.11
+At5g08280 -0.14 0.08
+At5g14220 -0.30 0.08
+At5g26710 -0.40 0.06
+At5g63570 0.00 0.12
+At5g64050 -0.26 0.08
+At2g43360 -0.30 0.07
+At5g04620 0.10 0.07
+At5g57590 -0.07 0.07
+At1g50430 0.37 0.08
+At1g58440 0.10 0.08
+At2g07050 -0.24 0.08
+At2g22830 0.00 0.06
+At2g38050 0.12 0.07
+At3g02580 0.25 0.05
+At3g19820 -0.42 0.07
+At3g50660 -0.31 0.06
+At4g37760 0.14 0.08
+At5g05690 -0.09 0.06
+At5g24140 0.02 0.07
+At5g24150 0.00 0.07
+At5g24160 -0.04 0.07
+At5g38970 -0.27 0.07
+At5g42600 -0.30 0.07
+At5g48010 -0.34 0.07
+At1g32060 -0.33 0.06
+At1g43670 -0.15 0.09
+At1g56190 0.17 0.10
+At1g58150 -0.69 0.13
+At1g71100 0.15 0.10
+At1g79550 0.13 0.10
+At2g01140 -0.26 0.08
+At2g01290 0.06 0.10
+At2g21170 -0.09 0.10
+At2g21330 -0.13 0.08
+At2g36460 -0.15 0.09
+At2g45290 -0.25 0.09
+At3g01850 0.06 0.08
+At3g04790 0.10 0.10
+At3g12780 0.15 0.10
+At3g52930 -0.22 0.09
+At3g54050 -0.22 0.09
+At3g55440 0.08 0.09
+At3g55800 -0.13 0.09
+At4g10750 -0.20 0.09
+At4g26520 -0.08 0.08
+At4g26530 -0.11 0.09
+At4g38970 -0.18 0.08
+At5g03690 -0.19 0.09
+At5g38410 -0.21 0.06
+At5g44520 0.02 0.08
+At5g64380 -0.24 0.09
+At3g26830 -0.12 0.06
+At1g78240 -0.36 0.07
+At2g28450 -0.45 0.06
+At2g38660 0.13 0.07
+At2g44160 -0.31 0.07
+At3g13440 -0.05 0.06
+At3g18000 -0.44 0.08
+At3g59970 -0.19 0.07
+At4g10760 -0.46 0.07
+At4g31790 -0.15 0.09
+At5g23050 -0.10 0.08
+At5g24840 -0.33 0.06
+At5g36880 -0.18 0.09
+At1g65520 0.13 0.08
+At4g14430 0.14 0.09
+At4g14440 0.13 0.08
+At1g65520 0.13 0.08
+At4g14430 0.14 0.09
+At4g14440 0.13 0.08
+At1g06820 -0.09 0.10
+At1g08550 -0.34 0.06
+At3g04870 -0.14 0.08
+At3g10230 -0.11 0.07
+At4g14210 -0.16 0.07
+At4g25700 -0.14 0.09
+At5g17230 -0.26 0.05
+At5g57030 -0.10 0.07
+At5g67030 -0.30 0.09
+At1g04710 -0.04 0.10
+At2g33150 -0.03 0.09
+At4g37470 0.18 0.07
+At5g48880 0.08 0.10
+At1g02730 -0.28 0.08
+At1g32180 -0.06 0.07
+At1g55850 0.02 0.08
+At2g21770 -0.19 0.07
+At2g25540 -0.17 0.07
+At2g32530 0.01 0.06
+At2g32540 0.00 0.07
+At2g32610 0.08 0.06
+At2g32620 0.06 0.07
+At2g33100 -0.22 0.08
+At3g03050 -0.21 0.07
+At3g56000 0.22 0.06
+At4g15290 0.08 0.06
+At4g15320 0.00 0.05
+At4g16590 0.09 0.06
+At4g23990 0.02 0.07
+At4g24000 0.04 0.06
+At4g24010 -0.01 0.06
+At4g31590 0.10 0.05
+At4g32410 -0.24 0.08
+At4g38190 -0.23 0.08
+At4g39350 -0.21 0.08
+At5g05170 -0.18 0.07
+At5g09870 -0.22 0.07
+At5g16190 0.16 0.05
+At5g16910 -0.22 0.08
+At5g49720 -0.42 0.09
+At5g64740 -0.21 0.08
+At1g48850 -0.33 0.09
+At1g48860 -0.02 0.08
+At2g21940 -0.40 0.07
+At2g45300 -0.03 0.08
+At3g26900 -0.10 0.05
+At4g33510 -0.35 0.08
+At4g39540 -0.43 0.07
+At4g39980 -0.31 0.07
+At5g66120 -0.02 0.09
+At1g78240 -0.36 0.07
+At2g28450 -0.45 0.06
+At3g13440 -0.05 0.06
+At3g18000 -0.44 0.08
+At4g10760 -0.46 0.07
+At4g31790 -0.15 0.09
+At5g24840 -0.33 0.06
+At1g30620 -0.28 0.08
+At1g64440 -0.22 0.10
+At1g67070 -0.25 0.07
+At1g74910 0.04 0.07
+At2g04650 -0.06 0.07
+At2g34850 -0.46 0.08
+At2g45790 -0.38 0.08
+At3g02570 -0.29 0.06
+At3g03250 -0.15 0.06
+At3g10700 -0.16 0.09
+At3g29360 -0.12 0.06
+At4g10960 -0.20 0.09
+At4g16130 -0.12 0.08
+At4g20460 -0.16 0.08
+At4g23920 -0.28 0.09
+At4g30570 0.02 0.08
+At5g18200 -0.36 0.06
+At5g44480 -0.25 0.08
+At2g45970 -0.10 0.06
+At1g23730 -0.26 0.05
+At1g58180 -0.38 0.06
+At1g70410 -0.20 0.06
+At2g28210 -0.61 0.07
+At3g01500 -0.03 0.09
+At4g20990 -0.49 0.07
+At4g33580 -0.39 0.06
+At5g04180 -0.47 0.06
+At5g14740 -0.16 0.06
+At5g56330 -0.76 0.05
+At1g55920 -0.11 0.09
+At2g17640 -0.04 0.11
+At2g43750 0.02 0.09
+At3g03630 -0.02 0.08
+At3g04940 0.03 0.10
+At3g13110 -0.22 0.07
+At3g22460 0.05 0.12
+At3g59760 -0.08 0.09
+At3g61440 -0.11 0.08
+At4g14880 0.09 0.11
+At5g28020 0.10 0.11
+At5g28030 0.06 0.11
+At5g56760 0.04 0.09
+At1g25410 -0.23 0.07
+At1g68460 -0.44 0.05
+At3g19160 -0.30 0.06
+At3g23630 -0.32 0.05
+At3g63110 -0.26 0.05
+At4g24650 -0.43 0.05
+At5g19040 -0.22 0.04
+At3g01850 0.06 0.08
+At4g30310 -0.02 0.09
+At5g48230 0.18 0.13
+At2g26230 -0.29 0.06
+At1g17410 -0.13 0.06
+At2g19570 -0.10 0.07
+At3g07800 -0.06 0.08
+At3g46940 -0.16 0.10
+At4g09320 -0.03 0.09
+At4g11010 -0.10 0.09
+At4g29570 -0.05 0.07
+At4g29580 -0.10 0.06
+At4g29600 -0.14 0.06
+At5g23070 -0.27 0.06
+At5g28050 -0.11 0.08
+At5g59440 -0.40 0.06
+At2g21790 -0.32 0.06
+At3g27060 -0.08 0.05
+At1g31070 -0.42 0.08
+At2g35020 -0.19 0.07
+At1g19920 -0.41 0.07
+At1g62180 -0.40 0.07
+At3g22890 -0.35 0.08
+At4g04610 -0.34 0.07
+At4g14680 -0.34 0.07
+At4g21990 -0.33 0.07
+At5g43780 -0.34 0.08
+At2g27860 -0.25 0.07
+At3g53520 -0.26 0.09
+At4g00560 -0.05 0.06
+At5g28840 -0.48 0.09
+At5g59290 -0.40 0.07
+At1g01120 -0.10 0.06
+At1g68530 -0.04 0.05
+At1g01480 -0.20 0.07
+At1g02500 -0.30 0.09
+At1g62380 -0.51 0.06
+At1g62960 -0.25 0.08
+At2g22810 -0.33 0.05
+At3g49700 -0.28 0.06
+At3g61510 -0.21 0.06
+At4g11280 -0.28 0.07
+At4g23340 -0.36 0.06
+At4g26200 -0.33 0.06
+At4g37770 -0.37 0.06
+At5g28360 -0.39 0.08
+At5g65800 -0.31 0.06
+At1g36050 -0.21 0.08
+At1g36160 -0.23 0.07
+At1g36180 -0.25 0.06
+At5g16390 -0.32 0.03
+At5g35360 -0.14 0.09
+At5g46290 -0.16 0.10
+At2g22230 -0.02 0.07
+At3g55310 0.20 0.07
+At5g10160 0.12 0.06
+At5g46290 -0.16 0.10
+At2g22230 -0.02 0.07
+At3g55310 0.20 0.07
+At5g10160 0.12 0.06
+At5g46290 -0.16 0.10
+At1g04710 -0.04 0.10
+At1g49430 -0.20 0.07
+At2g33150 -0.03 0.09
+At3g05970 -0.07 0.08
+At3g60510 -0.17 0.08
+At4g16210 -0.06 0.08
+At4g16800 0.07 0.09
+At4g31810 -0.19 0.07
+At5g27600 -0.14 0.08
+At5g43280 0.06 0.07
+At5g48880 0.08 0.10
+At1g01390 0.02 0.06
+At1g01420 -0.05 0.07
+At1g02050 -0.23 0.09
+At1g49390 -0.35 0.06
+At1g51680 0.02 0.06
+At1g53520 0.00 0.07
+At1g65060 -0.03 0.07
+At1g78550 -0.32 0.04
+At2g18570 0.01 0.07
+At2g22930 -0.02 0.07
+At2g29710 0.13 0.06
+At2g29730 0.07 0.05
+At2g29740 0.04 0.05
+At2g29750 0.04 0.06
+At2g44800 -0.16 0.04
+At3g19000 -0.60 0.05
+At3g19010 -0.61 0.04
+At3g21230 -0.03 0.06
+At3g21240 -0.01 0.06
+At3g29630 -0.07 0.07
+At3g48990 -0.11 0.07
+At3g50210 -0.29 0.10
+At3g51240 -0.48 0.07
+At3g55120 0.04 0.07
+At4g00040 -0.19 0.09
+At4g01070 -0.12 0.07
+At4g05160 0.00 0.08
+At4g10490 -0.37 0.03
+At4g16330 -0.39 0.06
+At4g19010 0.11 0.07
+At5g05270 -0.27 0.08
+At5g07990 -0.12 0.08
+At5g08640 -0.51 0.06
+At5g13930 -0.12 0.09
+At5g24530 -0.46 0.04
+At5g38120 -0.02 0.07
+At5g54010 -0.07 0.08
+At5g54060 -0.11 0.07
+At5g63380 -0.03 0.07
+At5g63580 -0.35 0.07
+At5g63590 -0.43 0.06
+At5g66220 -0.15 0.06
+At2g21550 -0.22 0.05
+At2g28880 -0.34 0.07
+At3g10160 -0.03 0.07
+At3g11750 -0.08 0.05
+At3g55630 -0.21 0.07
+At4g24380 -0.25 0.09
+At4g30000 -0.09 0.07
+At4g34570 -0.20 0.06
+At5g05980 -0.18 0.07
+At5g41480 -0.05 0.08
+At5g62980 0.16 0.07
+At1g22020 -0.37 0.08
+At2g21550 -0.22 0.05
+At2g38660 0.13 0.07
+At3g10160 -0.03 0.07
+At4g13890 -0.29 0.08
+At4g24380 -0.25 0.09
+At4g32520 -0.19 0.08
+At4g34570 -0.20 0.06
+At4g37930 -0.33 0.07
+At5g26780 -0.28 0.07
+At5g41480 -0.05 0.08
+At1g30620 -0.28 0.08
+At1g64440 -0.22 0.10
+At2g34850 -0.46 0.08
+At3g03250 -0.15 0.06
+At3g10700 -0.16 0.09
+At4g10960 -0.20 0.09
+At4g16130 -0.12 0.08
+At4g20460 -0.16 0.08
+At4g23920 -0.28 0.09
+At5g18200 -0.36 0.06
+At5g44480 -0.25 0.08
+At1g23190 -0.25 0.09
+At1g45130 -0.26 0.10
+At1g70730 -0.21 0.09
+At1g70820 -0.21 0.09
+At1g72990 -0.25 0.08
+At1g77410 -0.37 0.08
+At2g16730 -0.54 0.08
+At2g32810 -0.33 0.09
+At3g10700 -0.16 0.09
+At3g26380 -0.43 0.08
+At3g52840 -0.26 0.09
+At3g53080 -0.27 0.10
+At3g56310 -0.15 0.08
+At4g16130 -0.12 0.08
+At4g26140 -0.33 0.09
+At4g35010 -0.50 0.08
+At4g36360 -0.28 0.10
+At5g08370 -0.34 0.08
+At5g08380 -0.26 0.08
+At5g17530 -0.17 0.08
+At5g18200 -0.36 0.06
+At5g20710 -0.48 0.09
+At5g51820 -0.17 0.09
+At5g56870 -0.24 0.09
+At5g63800 -0.25 0.10
+At5g63810 -0.37 0.09
+At1g15550 -0.20 0.06
+At1g79460 -0.35 0.05
+At4g02780 -0.39 0.05
+At4g25420 -0.37 0.04
+At5g25900 -0.26 0.05
+At1g09780 -0.27 0.10
+At1g13440 -0.14 0.08
+At1g16300 -0.13 0.08
+At1g22170 -0.38 0.05
+At1g42970 0.00 0.08
+At1g43670 -0.15 0.09
+At1g56190 0.17 0.10
+At1g58150 -0.69 0.13
+At1g74030 -0.18 0.09
+At1g78040 -0.27 0.05
+At1g78050 -0.44 0.05
+At1g79530 -0.14 0.08
+At1g79550 0.13 0.10
+At2g01140 -0.26 0.08
+At2g21330 -0.13 0.08
+At2g36460 -0.15 0.09
+At3g04120 -0.14 0.08
+At3g08590 -0.26 0.10
+At3g12780 0.15 0.10
+At3g26650 -0.02 0.09
+At3g43290 -0.69 0.09
+At3g52930 -0.22 0.09
+At3g54050 -0.22 0.09
+At4g00570 -0.09 0.09
+At4g10750 -0.20 0.09
+At4g24620 -0.17 0.08
+At4g26520 -0.08 0.08
+At4g26530 -0.11 0.09
+At4g38970 -0.18 0.08
+At5g03690 -0.19 0.09
+At5g04120 -0.34 0.08
+At5g42740 -0.11 0.07
+At5g64380 -0.24 0.09
+At1g23190 -0.25 0.09
+At1g70730 -0.21 0.09
+At1g70820 -0.21 0.09
+At3g03250 -0.15 0.06
+At5g17530 -0.17 0.08
+At5g51820 -0.17 0.09
+At5g57655 -0.40 0.09
+At1g16400 -0.30 0.07
+At1g16410 -0.29 0.06
+At4g03050 -0.33 0.08
+At4g03060 -0.39 0.07
+At4g13770 -0.27 0.06
+At2g22330 -0.16 0.05
+At4g31500 -0.15 0.06
+At4g39950 -0.19 0.05
+At1g65960 -0.21 0.06
+At2g02000 -0.26 0.06
+At2g02010 -0.27 0.06
+At3g17720 -0.03 0.07
+At5g17330 -0.27 0.05
+At1g64800 -1.06 0.03
+At2g20420 -0.06 0.09
+At4g26910 -0.27 0.06
+At5g23250 0.08 0.13
+At5g65750 -0.48 0.07
+At1g62800 -0.09 0.07
+At1g77670 -0.09 0.06
+At1g80360 -0.09 0.09
+At2g13810 -0.24 0.08
+At2g20420 -0.06 0.09
+At2g22250 0.06 0.07
+At2g30970 -0.32 0.08
+At2g47510 -0.17 0.08
+At4g31990 -0.17 0.08
+At5g11520 -0.11 0.09
+At5g19550 -0.12 0.08
+At5g23250 0.08 0.13
+At5g50950 -0.27 0.07
+At1g80600 0.11 0.09
+At2g19940 -0.14 0.07
+At3g57560 0.09 0.09
+At4g37670 -0.09 0.10
+At1g48470 -0.48 0.10
+At1g66200 -0.42 0.11
+At3g17820 -0.46 0.12
+At3g53170 -0.13 0.06
+At3g53180 -0.13 0.07
+At5g16570 -0.43 0.11
+At5g37600 -0.42 0.11
+At4g23100 -0.29 0.09
+At5g27380 -0.28 0.07
+At1g09780 -0.27 0.10
+At1g13440 -0.14 0.08
+At1g16300 -0.13 0.08
+At1g22170 -0.38 0.05
+At1g42970 0.00 0.08
+At1g56190 0.17 0.10
+At1g58150 -0.69 0.13
+At1g74030 -0.18 0.09
+At1g78040 -0.27 0.05
+At1g78050 -0.44 0.05
+At1g79530 -0.14 0.08
+At1g79550 0.13 0.10
+At3g04120 -0.14 0.08
+At3g08590 -0.26 0.10
+At3g12780 0.15 0.10
+At3g26650 -0.02 0.09
+At3g43290 -0.69 0.09
+At5g04120 -0.34 0.08
+At1g74210 -0.37 0.05
+At1g80460 -0.04 0.10
+At2g40690 0.13 0.06
+At2g41540 -0.11 0.07
+At3g10370 -0.14 0.08
+At3g56840 -0.19 0.08
+At5g08030 -0.44 0.05
+At1g18640 -0.03 0.08
+At1g22020 -0.37 0.08
+At4g13890 -0.29 0.08
+At4g32520 -0.19 0.08
+At4g37930 -0.33 0.07
+At5g26780 -0.28 0.07
+At1g11860 -0.25 0.11
+At1g60990 -0.13 0.09
+At1g03310 -0.28 0.06
+At1g23190 -0.25 0.09
+At1g69830 -0.55 0.08
+At1g70730 -0.21 0.09
+At1g70820 -0.21 0.09
+At1g76130 -0.51 0.09
+At2g39930 -0.39 0.07
+At2g40840 -0.43 0.06
+At4g25000 -0.32 0.09
+At5g17530 -0.17 0.08
+At5g51820 -0.17 0.09
+At5g64860 -0.32 0.07
+At1g07110 -0.38 0.07
+At1g09780 -0.27 0.10
+At1g13440 -0.14 0.08
+At1g16300 -0.13 0.08
+At1g22170 -0.38 0.05
+At1g42970 0.00 0.08
+At1g56190 0.17 0.10
+At1g58150 -0.69 0.13
+At1g74030 -0.18 0.09
+At1g78040 -0.27 0.05
+At1g78050 -0.44 0.05
+At1g79530 -0.14 0.08
+At1g79550 0.13 0.10
+At2g01140 -0.26 0.08
+At2g21170 -0.09 0.10
+At2g21330 -0.13 0.08
+At2g36460 -0.15 0.09
+At2g36580 0.02 0.06
+At3g04050 -0.05 0.06
+At3g04120 -0.14 0.08
+At3g08590 -0.26 0.10
+At3g12780 0.15 0.10
+At3g22960 -0.11 0.07
+At3g25960 0.04 0.07
+At3g26650 -0.02 0.09
+At3g43290 -0.69 0.09
+At3g49160 -0.17 0.07
+At3g52930 -0.22 0.09
+At3g55440 0.08 0.09
+At3g55650 0.03 0.07
+At3g55810 0.06 0.07
+At4g10750 -0.20 0.09
+At4g24620 -0.17 0.08
+At4g26390 -0.02 0.07
+At4g26520 -0.08 0.08
+At4g26530 -0.11 0.09
+At4g38970 -0.18 0.08
+At5g03690 -0.19 0.09
+At5g04120 -0.34 0.08
+At5g08570 0.03 0.07
+At5g42740 -0.11 0.07
+At5g52920 -0.20 0.07
+At5g56350 -0.02 0.07
+At5g63680 -0.02 0.07
+At1g07110 -0.38 0.07
+At1g09780 -0.27 0.10
+At1g12000 -0.20 0.10
+At1g13440 -0.14 0.08
+At1g16300 -0.13 0.08
+At1g22170 -0.38 0.05
+At1g42970 0.00 0.08
+At1g56190 0.17 0.10
+At1g58150 -0.69 0.13
+At1g74030 -0.18 0.09
+At1g78040 -0.27 0.05
+At1g78050 -0.44 0.05
+At1g79530 -0.14 0.08
+At1g79550 0.13 0.10
+At2g01140 -0.26 0.08
+At2g21170 -0.09 0.10
+At2g21330 -0.13 0.08
+At2g22480 -0.15 0.09
+At2g36460 -0.15 0.09
+At2g36580 0.02 0.06
+At3g04050 -0.05 0.06
+At3g04120 -0.14 0.08
+At3g08590 -0.26 0.10
+At3g12780 0.15 0.10
+At3g22960 -0.11 0.07
+At3g25960 0.04 0.07
+At3g26650 -0.02 0.09
+At3g43290 -0.69 0.09
+At3g49160 -0.17 0.07
+At3g52930 -0.22 0.09
+At3g55440 0.08 0.09
+At3g55650 0.03 0.07
+At3g55810 0.06 0.07
+At4g10750 -0.20 0.09
+At4g24620 -0.17 0.08
+At4g26390 -0.02 0.07
+At4g26520 -0.08 0.08
+At4g26530 -0.11 0.09
+At4g32840 -0.16 0.09
+At4g38970 -0.18 0.08
+At5g03690 -0.19 0.09
+At5g04120 -0.34 0.08
+At5g08570 0.03 0.07
+At5g42740 -0.11 0.07
+At5g47810 -0.18 0.09
+At5g52920 -0.20 0.07
+At5g56350 -0.02 0.07
+At5g63680 -0.02 0.07
+At2g11810 -0.31 0.07
+At3g11670 -0.48 0.05
+At4g00550 -0.34 0.05
+At4g31780 -0.15 0.09
+At4g33030 -0.28 0.06
+At5g01220 -0.12 0.06
+At5g20410 -0.29 0.06
+At3g11170 -0.31 0.06
+At3g15850 -0.05 0.06
+At4g30950 -0.07 0.04
+At5g05580 -0.32 0.06
+At2g42790 -0.21 0.07
+At2g44350 -0.22 0.07
+At3g21720 -0.36 0.07
+At3g58740 -0.24 0.08
+At3g58750 -0.17 0.07
+At3g60100 -0.15 0.08
+At4g00570 -0.09 0.09
+At4g26970 -0.25 0.08
+At4g35830 -0.16 0.09
+At5g03860 -0.36 0.08
+At1g31860 -0.44 0.04
+At1g58080 -0.04 0.07
+At1g71920 -0.16 0.06
+At2g36230 -0.13 0.11
+At4g14910 -0.33 0.07
+At5g10330 -0.15 0.06
+At5g63890 0.00 0.06
+At1g33320 -0.06 0.07
+At3g01120 0.11 0.07
+At3g57050 -0.11 0.06
+At1g02460 -0.38 0.09
+At1g02790 0.02 0.11
+At1g02810 -0.23 0.07
+At1g04630 -0.49 0.09
+At1g05650 0.06 0.10
+At1g05660 0.06 0.10
+At1g10640 -0.17 0.09
+At1g11370 0.04 0.08
+At1g11580 -0.19 0.06
+At1g14890 -0.20 0.04
+At1g17150 -0.16 0.09
+At1g19170 -0.20 0.10
+At1g21850 -0.21 0.08
+At1g21860 -0.17 0.08
+At1g23200 -0.32 0.06
+At1g23460 -0.20 0.09
+At1g43080 -0.29 0.08
+At1g44980 -0.10 0.05
+At1g53840 -0.22 0.06
+At1g56710 -0.05 0.07
+At1g60590 -0.26 0.07
+At1g65570 -0.03 0.10
+At1g69940 -0.13 0.10
+At1g70500 -0.18 0.08
+At1g75790 -0.07 0.08
+At1g76160 -0.24 0.09
+At1g80140 -0.03 0.11
+At1g80170 -0.19 0.08
+At2g15470 -0.25 0.09
+At2g19150 -0.12 0.10
+At2g21610 -0.22 0.07
+At2g23630 -0.13 0.08
+At2g23900 -0.02 0.08
+At2g26440 -0.26 0.08
+At2g26450 -0.44 0.06
+At2g33160 -0.29 0.08
+At2g36700 -0.22 0.09
+At2g36710 -0.35 0.09
+At2g40310 -0.26 0.08
+At2g41850 -0.27 0.08
+At2g43050 -0.29 0.08
+At2g43860 -0.08 0.08
+At2g43870 -0.01 0.10
+At2g43880 -0.16 0.10
+At2g43890 -0.07 0.10
+At2g45220 -0.22 0.08
+At2g46930 -0.19 0.07
+At2g47030 -0.23 0.10
+At2g47040 -0.22 0.09
+At2g47050 -0.04 0.06
+At2g47280 -0.15 0.11
+At2g47550 -0.24 0.08
+At2g47670 -0.04 0.02
+At3g05610 -0.24 0.07
+At3g05620 -0.30 0.08
+At3g06770 -0.09 0.10
+At3g06830 -0.18 0.08
+At3g07820 -0.15 0.10
+At3g07830 -0.10 0.10
+At3g07840 -0.09 0.10
+At3g07970 -0.33 0.07
+At3g10710 -0.23 0.07
+At3g10720 -0.25 0.09
+At3g14040 -0.01 0.11
+At3g14310 -0.27 0.08
+At3g15720 -0.08 0.10
+At3g17060 -0.25 0.08
+At3g24130 -0.03 0.10
+At3g26610 -0.22 0.09
+At3g27980 -0.16 0.07
+At3g29090 -0.29 0.09
+At3g43270 -0.12 0.08
+At3g47380 -0.04 0.04
+At3g47400 -0.27 0.08
+At3g47670 -0.20 0.05
+At3g49220 -0.18 0.08
+At3g57790 -0.15 0.08
+At3g59010 -0.29 0.08
+At3g59850 -0.09 0.10
+At3g61490 -0.08 0.09
+At3g62180 0.04 0.04
+At4g00190 -0.33 0.09
+At4g02300 -0.27 0.06
+At4g02330 -0.29 0.05
+At4g03930 -0.23 0.07
+At4g12390 -0.03 0.04
+At4g13760 -0.36 0.09
+At4g15980 -0.57 0.06
+At4g18180 -0.16 0.09
+At4g22010 -0.22 0.10
+At4g23820 -0.17 0.11
+At4g25250 -0.36 0.04
+At4g25260 -0.22 0.04
+At4g28090 -0.36 0.07
+At4g33220 -0.38 0.09
+At4g33230 -0.43 0.06
+At4g33440 -0.09 0.09
+At4g35670 -0.11 0.09
+At4g37160 -0.14 0.07
+At4g38420 -0.35 0.08
+At5g04960 -0.23 0.07
+At5g04970 -0.31 0.08
+At5g07410 -0.11 0.10
+At5g07420 -0.17 0.09
+At5g07430 -0.24 0.09
+At5g09760 -0.32 0.08
+At5g14650 -0.15 0.10
+At5g17200 -0.18 0.12
+At5g18990 -0.10 0.10
+At5g19730 -0.18 0.08
+At5g20860 -0.28 0.07
+At5g26810 -0.09 0.09
+At5g27870 -0.27 0.07
+At5g39910 -0.36 0.07
+At5g41870 -0.16 0.10
+At5g44830 -0.27 0.09
+At5g44840 -0.13 0.09
+At5g47500 -0.21 0.09
+At5g48140 -0.12 0.10
+At5g49180 -0.21 0.08
+At5g51480 -0.23 0.07
+At5g51490 -0.12 0.08
+At5g51500 -0.09 0.08
+At5g53370 -0.21 0.09
+At5g55590 -0.29 0.09
+At5g64640 -0.25 0.07
+At5g66920 -0.21 0.08
+At5g23010 -0.19 0.08
+At1g14810 -0.13 0.07
+At3g02020 0.04 0.06
+At5g13280 0.05 0.07
+At5g21060 0.09 0.10
+At2g31810 -0.07 0.06
+At3g23940 -0.10 0.11
+At3g48560 -0.13 0.08
+At3g58610 -0.07 0.09
+At4g11640 0.16 0.08
+At5g16290 -0.15 0.06
+At1g21400 -0.45 0.06
+At2g33150 -0.03 0.09
+At3g60510 -0.17 0.08
+At4g16210 -0.06 0.08
+At4g16800 0.07 0.09
+At4g31810 -0.19 0.07
+At5g43280 0.06 0.07
+At5g48880 0.08 0.10
+At2g02500 -0.11 0.05
+At5g62790 -0.09 0.07
+At1g09400 -0.42 0.08
+At1g13280 -0.34 0.09
+At1g17420 -0.45 0.05
+At1g17990 -0.37 0.09
+At1g19640 -0.27 0.07
+At1g55020 -0.45 0.06
+At1g72520 -0.47 0.05
+At2g44810 -0.29 0.07
+At3g22400 -0.48 0.05
+At5g42650 -0.25 0.07
+At1g45130 -0.26 0.10
+At1g72990 -0.25 0.08
+At1g77410 -0.37 0.08
+At2g16730 -0.54 0.08
+At2g32810 -0.33 0.09
+At3g52840 -0.26 0.09
+At3g53080 -0.27 0.10
+At4g26140 -0.33 0.09
+At4g35010 -0.50 0.08
+At4g36360 -0.28 0.10
+At5g20710 -0.48 0.09
+At5g56870 -0.24 0.09
+At5g63800 -0.25 0.10
+At5g63810 -0.37 0.09
+At1g45130 -0.26 0.10
+At1g72990 -0.25 0.08
+At1g77410 -0.37 0.08
+At2g16730 -0.54 0.08
+At2g32810 -0.33 0.09
+At3g52840 -0.26 0.09
+At3g53080 -0.27 0.10
+At4g26140 -0.33 0.09
+At4g35010 -0.50 0.08
+At4g36360 -0.28 0.10
+At5g20710 -0.48 0.09
+At5g56870 -0.24 0.09
+At5g63800 -0.25 0.10
+At5g63810 -0.37 0.09
+At1g23190 -0.25 0.09
+At1g30620 -0.28 0.08
+At1g45130 -0.26 0.10
+At1g64440 -0.22 0.10
+At1g70730 -0.21 0.09
+At1g70820 -0.21 0.09
+At1g72990 -0.25 0.08
+At1g77410 -0.37 0.08
+At2g16730 -0.54 0.08
+At2g32810 -0.33 0.09
+At2g34850 -0.46 0.08
+At3g03250 -0.15 0.06
+At3g10700 -0.16 0.09
+At3g52840 -0.26 0.09
+At3g53080 -0.27 0.10
+At4g10960 -0.20 0.09
+At4g16130 -0.12 0.08
+At4g20460 -0.16 0.08
+At4g23920 -0.28 0.09
+At4g26140 -0.33 0.09
+At4g35010 -0.50 0.08
+At4g36360 -0.28 0.10
+At5g17530 -0.17 0.08
+At5g20710 -0.48 0.09
+At5g44480 -0.25 0.08
+At5g51820 -0.17 0.09
+At5g56870 -0.24 0.09
+At5g63800 -0.25 0.10
+At5g63810 -0.37 0.09
+At1g79230 -0.31 0.06
+At1g18500 -0.24 0.07
+At1g31180 -0.12 0.09
+At1g74040 -0.21 0.08
+At1g80560 -0.06 0.09
+At2g43090 -0.16 0.08
+At2g43100 -0.12 0.09
+At3g58990 -0.09 0.08
+At4g13430 -0.18 0.09
+At5g03290 -0.07 0.08
+At5g14200 -0.08 0.09
+At5g23010 -0.19 0.08
+At5g23020 -0.27 0.07
+At1g03090 -0.30 0.08
+At1g21400 -0.45 0.06
+At2g26800 -0.04 0.08
+At4g34030 -0.14 0.11
+At1g15950 -0.28 0.06
+At1g51680 0.02 0.06
+At1g65060 -0.03 0.07
+At2g30490 -0.20 0.06
+At2g37040 -0.16 0.08
+At2g40890 -0.28 0.06
+At3g10340 -0.18 0.08
+At3g19450 0.14 0.10
+At3g21230 -0.03 0.06
+At3g21240 -0.01 0.06
+At3g48990 -0.11 0.07
+At3g53260 -0.18 0.08
+At4g05160 0.00 0.08
+At4g19010 0.11 0.07
+At4g24520 -0.32 0.07
+At4g26220 -0.22 0.07
+At4g30210 -0.23 0.06
+At4g34050 -0.35 0.06
+At4g36220 -0.15 0.06
+At5g04230 -0.16 0.07
+At5g38120 -0.02 0.07
+At5g63380 -0.03 0.07
+At2g42010 -0.56 0.08
+At2g44810 -0.29 0.07
+At3g08510 -0.49 0.06
+At3g15730 -0.41 0.08
+At3g16785 -0.46 0.06
+At3g45880 -0.31 0.06
+At4g35790 -0.41 0.06
+At4g37070 -0.15 0.07
+At5g58670 -0.50 0.06
+At1g13280 -0.34 0.09
+At1g17420 -0.45 0.05
+At1g55020 -0.45 0.06
+At1g72520 -0.47 0.05
+At3g22400 -0.48 0.05
+At3g45140 -0.48 0.06
+At4g15440 -0.08 0.08
+At5g42650 -0.25 0.07
+At4g20380 -0.11 0.06
+At4g21610 -0.64 0.05
+At1g14810 -0.13 0.07
+At2g45440 -0.17 0.09
+At3g02020 0.04 0.06
+At3g14390 -0.18 0.07
+At3g53450 -0.06 0.10
+At3g53580 -0.15 0.10
+At3g60880 -0.24 0.09
+At5g03270 -0.09 0.09
+At5g06300 -0.10 0.08
+At5g11880 -0.16 0.07
+At5g11950 -0.07 0.11
+At5g13280 0.05 0.07
+At5g26140 -0.17 0.08
+At5g52100 0.14 0.07
+At4g33150 -0.14 0.07
+At5g48230 0.18 0.13
+At5g48230 0.18 0.13
+At4g11570 -0.42 0.05
+At1g67070 -0.25 0.07
+At1g74910 0.04 0.07
+At2g04650 -0.06 0.07
+At2g45790 -0.38 0.08
+At3g02570 -0.29 0.06
+At4g30570 0.02 0.08
+At2g41490 0.61 0.06
+At3g01120 0.11 0.07
+At3g25900 -0.15 0.07
+At3g57050 -0.11 0.06
+At3g63250 -0.21 0.08
+At5g20980 -0.17 0.05
+At1g02500 -0.30 0.09
+At4g13610 -0.53 0.06
+At4g13940 -0.12 0.08
+At4g19020 -0.63 0.07
+At5g49160 -0.47 0.07
+At1g11840 -0.33 0.10
+At3g10850 -0.46 0.07
+At1g78510 -0.03 0.07
+At2g38700 -0.27 0.06
+At3g02780 -0.29 0.04
+At3g54250 -0.29 0.06
+At4g11820 -0.29 0.07
+At4g17190 -0.26 0.05
+At5g27450 0.08 0.08
+At5g47770 -0.23 0.05
+At5g48230 0.18 0.13
+At1g37130 -0.36 0.08
+At1g48470 -0.48 0.10
+At1g66200 -0.42 0.11
+At1g77760 -0.38 0.07
+At2g15620 -0.38 0.08
+At3g17820 -0.46 0.12
+At3g53170 -0.13 0.06
+At3g53180 -0.13 0.07
+At5g16570 -0.43 0.11
+At5g37600 -0.42 0.11
+At1g12230 -0.04 0.06
+At1g64190 -0.26 0.10
+At1g71100 0.15 0.10
+At2g01290 0.06 0.10
+At2g45290 -0.25 0.09
+At3g01850 0.06 0.08
+At3g02360 -0.29 0.10
+At3g04790 0.10 0.10
+At5g13420 -0.18 0.06
+At5g41670 -0.27 0.10
+At5g44520 0.02 0.08
+At1g63380 0.08 0.09
+At1g74030 -0.18 0.09
+At2g36580 0.02 0.06
+At3g01260 -1.16 0.07
+At3g04050 -0.05 0.06
+At3g17940 -0.40 0.11
+At3g22960 -0.11 0.07
+At3g25960 0.04 0.07
+At3g49160 -0.17 0.07
+At3g55650 0.03 0.07
+At3g55810 0.06 0.07
+At4g26390 -0.02 0.07
+At5g08570 0.03 0.07
+At5g15140 -1.02 0.10
+At5g52920 -0.20 0.07
+At5g56350 -0.02 0.07
+At5g63680 -0.02 0.07
+At4g08870 -0.23 0.10
+At5g52810 -0.12 0.07
+At1g16350 -0.12 0.10
+At1g17410 -0.13 0.06
+At1g27450 0.10 0.06
+At1g63660 -0.14 0.08
+At1g79470 -0.10 0.11
+At1g80050 0.22 0.08
+At2g37250 -0.28 0.10
+At2g39270 -0.27 0.08
+At2g41880 -0.29 0.10
+At3g01820 -0.30 0.07
+At3g57550 -0.32 0.10
+At3g57610 -0.12 0.11
+At4g04880 -0.09 0.05
+At4g09320 -0.03 0.09
+At4g11010 -0.10 0.09
+At4g12440 0.17 0.06
+At4g22570 0.01 0.09
+At5g11160 0.16 0.09
+At5g35170 -0.34 0.06
+At5g47840 -0.43 0.06
+At5g50370 -0.43 0.06
+At5g63400 -0.43 0.07
+At4g08870 -0.23 0.10
+At5g52810 -0.12 0.07
+At1g23820 -0.15 0.07
+At1g70310 -0.10 0.08
+At5g19530 -0.31 0.06
+At5g53120 -0.07 0.08
+At1g09420 -0.32 0.07
+At1g13700 -0.13 0.04
+At1g24280 -0.32 0.06
+At1g64190 -0.26 0.10
+At3g02360 -0.29 0.10
+At3g27300 -0.46 0.07
+At3g49360 -0.17 0.05
+At5g13110 -0.37 0.06
+At5g24400 -0.20 0.05
+At5g24410 -0.11 0.04
+At5g24420 -0.23 0.04
+At5g35790 -0.40 0.06
+At5g40760 -0.44 0.07
+At5g41670 -0.27 0.10
+At1g60440 -0.16 0.11
+At2g46110 0.06 0.08
+At3g18030 -0.03 0.06
+At3g61530 0.01 0.08
+At5g48840 -0.16 0.10
+At1g08250 -0.10 0.05
+At1g11790 -0.15 0.05
+At1g69370 -0.42 0.03
+At1g71920 -0.16 0.06
+At2g20610 -0.15 0.06
+At2g24850 -0.10 0.07
+At3g29200 -0.41 0.06
+At4g23590 -0.14 0.06
+At4g23600 -0.15 0.05
+At4g28420 -0.08 0.06
+At5g10330 -0.15 0.06
+At5g10870 -0.38 0.04
+At5g36160 0.12 0.06
+At5g53970 0.07 0.07
+At2g30490 -0.20 0.06
+At2g37040 -0.16 0.08
+At3g10340 -0.18 0.08
+At3g53260 -0.18 0.08
+At4g24520 -0.32 0.07
+At4g30210 -0.23 0.06
+At5g04230 -0.16 0.07
+At1g32200 -0.25 0.06
+At1g51260 0.29 0.04
+At1g75020 0.24 0.05
+At3g18850 0.15 0.04
+At1g15080 0.39 0.08
+At1g62430 0.39 0.06
+At1g68000 0.66 0.03
+At2g01180 0.10 0.08
+At2g39290 0.21 0.05
+At2g45150 0.39 0.11
+At3g55030 0.51 0.07
+At3g60620 0.32 0.09
+At4g22340 0.40 0.07
+At4g25970 -0.13 0.07
+At4g26770 0.05 0.06
+At4g38570 0.70 0.03
+At5g57190 -0.13 0.07
+At2g29980 -0.09 0.06
+At3g11170 -0.31 0.06
+At3g12120 -0.08 0.06
+At4g30950 -0.07 0.04
+At5g05580 -0.32 0.06
+At1g68010 -0.06 0.08
+At2g13360 0.03 0.08
+At2g35120 -0.33 0.07
+At2g35370 -0.25 0.05
+At3g14150 -0.04 0.07
+At3g14415 -0.02 0.07
+At4g18360 -0.01 0.07
+At5g06580 -0.15 0.07
+At5g38410 -0.21 0.06
+At1g51680 0.02 0.06
+At1g53520 0.00 0.07
+At1g65060 -0.03 0.07
+At2g30490 -0.20 0.06
+At2g37040 -0.16 0.08
+At3g10340 -0.18 0.08
+At3g21230 -0.03 0.06
+At3g21240 -0.01 0.06
+At3g48990 -0.11 0.07
+At3g53260 -0.18 0.08
+At3g55120 0.04 0.07
+At4g05160 0.00 0.08
+At4g19010 0.11 0.07
+At4g24520 -0.32 0.07
+At4g30210 -0.23 0.06
+At5g04230 -0.16 0.07
+At5g05270 -0.27 0.08
+At5g38120 -0.02 0.07
+At5g63380 -0.03 0.07
+At5g66220 -0.15 0.06
+At1g23820 -0.15 0.07
+At1g70310 -0.10 0.08
+At2g16500 -0.08 0.10
+At2g27450 -0.27 0.08
+At3g02470 -0.26 0.07
+At4g34710 -0.09 0.09
+At5g08170 -0.44 0.07
+At5g15950 -0.27 0.07
+At5g18930 -0.28 0.05
+At5g53120 -0.07 0.08
+At1g78510 -0.03 0.07
+At3g02780 -0.29 0.04
+At4g17190 -0.26 0.05
+At5g47770 -0.23 0.05
+At5g60500 0.07 0.06
+At5g60510 -0.03 0.07
+At3g14050 -0.30 0.05
+At4g02260 -0.22 0.06
+At1g61720 -0.16 0.06
+At5g14800 0.25 0.10
+At1g04710 -0.04 0.10
+At2g33150 -0.03 0.09
+At4g37470 0.18 0.07
+At5g48880 0.08 0.10
+At1g12230 -0.04 0.06
+At1g71100 0.15 0.10
+At2g01290 0.06 0.10
+At2g42910 0.08 0.05
+At2g44530 0.04 0.07
+At2g45290 -0.25 0.09
+At3g04790 0.10 0.10
+At5g13420 -0.18 0.06
+At5g44520 0.02 0.08
+At1g09830 -0.01 0.10
+At1g16350 -0.12 0.10
+At1g31220 -0.14 0.08
+At1g63660 -0.14 0.08
+At1g74260 -0.13 0.08
+At1g79470 -0.10 0.11
+At2g35040 -0.24 0.08
+At2g37690 -0.08 0.08
+At3g21110 -0.33 0.05
+At3g55010 0.02 0.12
+At3g57610 -0.12 0.11
+At4g17360 -0.18 0.05
+At4g18440 -0.16 0.05
+At4g34740 -0.10 0.08
+At4g38880 0.01 0.07
+At2g01350 0.03 0.07
+At5g14760 -0.09 0.07
+At2g17630 -0.22 0.07
+At3g19030 0.30 0.05
+At5g49970 -0.36 0.07
+At5g37850 -0.07 0.07
+At5g49970 -0.36 0.07
+At1g29900 -0.10 0.07
+At3g20330 -0.19 0.06
+At3g27740 -0.30 0.09
+At3g54470 0.07 0.08
+At4g22930 -0.12 0.06
+At5g23300 -0.10 0.11
+At1g17410 -0.13 0.06
+At1g26190 -0.40 0.05
+At1g30820 -0.12 0.09
+At1g55810 -0.19 0.07
+At1g73980 -0.32 0.06
+At2g19570 -0.10 0.07
+At2g34890 -0.19 0.08
+At3g10030 -0.55 0.08
+At3g12670 -0.09 0.10
+At3g27190 -0.14 0.07
+At3g53900 0.14 0.07
+At3g54470 0.07 0.08
+At3g60180 -0.43 0.06
+At4g02120 -0.05 0.08
+At4g09320 -0.03 0.09
+At4g11010 -0.10 0.09
+At4g20320 -0.23 0.09
+At4g26510 -0.13 0.07
+At4g29570 -0.05 0.07
+At4g29580 -0.10 0.06
+At4g29600 -0.14 0.06
+At5g28050 -0.11 0.08
+At1g03040 -0.40 0.10
+At1g34430 0.13 0.09
+At1g48030 -0.05 0.10
+At1g54220 -0.41 0.06
+At3g16950 -0.03 0.09
+At3g17240 -0.03 0.10
+At3g25860 0.03 0.06
+At3g52200 -0.25 0.06
+At4g25100 -0.35 0.06
+At4g35090 -0.57 0.05
+At3g01850 0.06 0.08
+At4g30310 -0.02 0.09
+At1g06730 0.02 0.08
+At1g17160 -0.05 0.08
+At1g19600 0.02 0.10
+At1g71100 0.15 0.10
+At2g01290 0.06 0.10
+At2g45290 -0.25 0.09
+At3g04790 0.10 0.10
+At5g44520 0.02 0.08
+At1g18870 -0.23 0.07
+At1g74710 -0.23 0.07
+At2g37040 -0.16 0.08
+At3g10340 -0.18 0.08
+At3g53260 -0.18 0.08
+At5g04230 -0.16 0.07
+At1g18640 -0.03 0.08
+At1g72190 -0.01 0.08
+At2g17630 -0.22 0.07
+At3g19030 0.30 0.05
+At4g34200 0.14 0.08
+At1g09780 -0.27 0.10
+At1g22020 -0.37 0.08
+At1g22170 -0.38 0.05
+At1g74030 -0.18 0.09
+At1g78040 -0.27 0.05
+At1g78050 -0.44 0.05
+At2g42790 -0.21 0.07
+At2g44350 -0.22 0.07
+At3g08590 -0.26 0.10
+At3g21720 -0.36 0.07
+At3g58740 -0.24 0.08
+At3g58750 -0.17 0.07
+At3g60100 -0.15 0.08
+At4g00570 -0.09 0.09
+At4g13890 -0.29 0.08
+At4g26970 -0.25 0.08
+At4g32520 -0.19 0.08
+At4g35830 -0.16 0.09
+At4g37930 -0.33 0.07
+At5g04120 -0.34 0.08
+At5g26780 -0.28 0.07
+At2g22990 -0.28 0.08
+At2g40890 -0.28 0.06
+At4g36220 -0.15 0.06
+At1g11720 -0.46 0.07
+At1g23190 -0.25 0.09
+At1g27680 -0.18 0.08
+At1g70730 -0.21 0.09
+At1g70820 -0.21 0.09
+At2g21590 -0.27 0.08
+At3g01180 -0.43 0.07
+At3g20440 -0.41 0.08
+At4g24620 -0.17 0.08
+At4g39210 -0.31 0.07
+At5g03650 -0.50 0.08
+At5g17530 -0.17 0.08
+At5g19220 -0.21 0.08
+At5g42740 -0.11 0.07
+At5g48300 -0.17 0.07
+At5g51820 -0.17 0.09
+At1g03310 -0.28 0.06
+At1g67490 -0.37 0.08
+At1g69830 -0.55 0.08
+At1g76130 -0.51 0.09
+At2g32290 -0.58 0.07
+At2g39930 -0.39 0.07
+At2g40840 -0.43 0.06
+At2g45880 -0.44 0.09
+At3g23920 -0.41 0.10
+At3g29320 -0.36 0.06
+At4g00490 -0.34 0.08
+At4g15210 -0.37 0.09
+At4g17090 -0.60 0.10
+At4g25000 -0.32 0.09
+At5g04360 -0.30 0.07
+At5g11720 -0.26 0.08
+At5g18670 -0.24 0.09
+At5g45300 -0.47 0.09
+At5g55700 -0.28 0.08
+At5g64860 -0.32 0.07
+At1g51680 0.02 0.06
+At1g65060 -0.03 0.07
+At2g30490 -0.20 0.06
+At2g37040 -0.16 0.08
+At2g40890 -0.28 0.06
+At2g45970 -0.10 0.06
+At3g10340 -0.18 0.08
+At3g21240 -0.01 0.06
+At3g53260 -0.18 0.08
+At4g24520 -0.32 0.07
+At4g26220 -0.22 0.07
+At4g30210 -0.23 0.06
+At4g34050 -0.35 0.06
+At5g04230 -0.16 0.07
+At1g03160 -0.13 0.05
+At1g04920 -0.48 0.07
+At1g06020 -0.11 0.08
+At1g23190 -0.25 0.09
+At1g43670 -0.15 0.09
+At1g50390 -0.18 0.06
+At1g66430 -0.11 0.07
+At1g69200 -0.58 0.06
+At1g70730 -0.21 0.09
+At1g70820 -0.21 0.09
+At1g73370 -0.39 0.07
+At2g31390 -0.06 0.09
+At3g03250 -0.15 0.06
+At3g43190 -0.30 0.06
+At3g54050 -0.22 0.09
+At3g54090 -0.50 0.06
+At3g59480 0.05 0.09
+At4g02280 -0.27 0.07
+At4g10120 -0.46 0.07
+At4g10260 -0.04 0.09
+At4g24620 -0.17 0.08
+At5g11110 -0.34 0.07
+At5g17530 -0.17 0.08
+At5g20280 -0.47 0.07
+At5g20830 -0.33 0.06
+At5g37180 -0.33 0.07
+At5g42740 -0.11 0.07
+At5g49190 -0.23 0.06
+At5g51820 -0.17 0.09
+At5g51830 0.00 0.09
+At5g64380 -0.24 0.09
+At1g03160 -0.13 0.05
+At1g06020 -0.11 0.08
+At1g12240 -0.29 0.08
+At1g22650 -0.22 0.07
+At1g23190 -0.25 0.09
+At1g35580 -0.23 0.07
+At1g47840 -0.12 0.09
+At1g50390 -0.18 0.06
+At1g50460 0.01 0.09
+At1g55120 -0.42 0.07
+At1g56560 -0.28 0.05
+At1g62660 -0.31 0.07
+At1g66430 -0.11 0.07
+At1g69200 -0.58 0.06
+At1g70730 -0.21 0.09
+At1g70820 -0.21 0.09
+At1g73370 -0.39 0.07
+At2g31390 -0.06 0.09
+At2g36190 -0.42 0.07
+At3g03250 -0.15 0.06
+At3g05820 -0.36 0.05
+At3g06500 -0.27 0.08
+At3g13784 -0.43 0.07
+At3g13790 -0.48 0.07
+At3g20040 0.04 0.08
+At3g43190 -0.30 0.06
+At3g52600 -0.52 0.08
+At3g54090 -0.50 0.06
+At3g59480 0.05 0.09
+At4g02280 -0.27 0.07
+At4g09510 -0.25 0.06
+At4g10260 -0.04 0.09
+At4g24620 -0.17 0.08
+At4g29130 0.05 0.09
+At4g34860 -0.26 0.06
+At4g37840 -0.10 0.08
+At5g17530 -0.17 0.08
+At5g20830 -0.33 0.06
+At5g22510 -0.19 0.06
+At5g37180 -0.33 0.07
+At5g42740 -0.11 0.07
+At5g49190 -0.23 0.06
+At5g51820 -0.17 0.09
+At5g51830 0.00 0.09
+At1g19920 -0.41 0.07
+At1g55920 -0.11 0.09
+At1g62180 -0.40 0.07
+At2g17640 -0.04 0.11
+At2g43750 0.02 0.09
+At3g01120 0.11 0.07
+At3g03630 -0.02 0.08
+At3g04940 0.03 0.10
+At3g13110 -0.22 0.07
+At3g22460 0.05 0.12
+At3g22890 -0.35 0.08
+At3g57050 -0.11 0.06
+At3g59760 -0.08 0.09
+At3g61440 -0.11 0.08
+At4g04610 -0.34 0.07
+At4g14680 -0.34 0.07
+At4g14880 0.09 0.11
+At4g21990 -0.33 0.07
+At5g04590 -0.38 0.08
+At5g20980 -0.17 0.05
+At5g28020 0.10 0.11
+At5g28030 0.06 0.11
+At5g43780 -0.34 0.08
+At5g56760 0.04 0.09
+At3g01910 -0.32 0.08
+At1g64190 -0.26 0.10
+At1g71100 0.15 0.10
+At2g01290 0.06 0.10
+At2g16790 -0.14 0.06
+At3g02360 -0.29 0.10
+At3g04790 0.10 0.10
+At5g41670 -0.27 0.10
+At5g44520 0.02 0.08
+At1g65930 -0.33 0.08
+At2g18450 -0.31 0.10
+At2g20420 -0.06 0.09
+At2g42790 -0.21 0.07
+At2g44350 -0.22 0.07
+At2g47510 -0.17 0.08
+At3g27380 -0.44 0.06
+At3g58740 -0.24 0.08
+At3g58750 -0.17 0.07
+At3g60100 -0.15 0.08
+At4g00570 -0.09 0.09
+At4g26970 -0.25 0.08
+At4g35830 -0.16 0.09
+At5g09600 -0.06 0.07
+At5g14590 -0.23 0.07
+At5g23250 0.08 0.13
+At5g40650 -0.45 0.08
+At5g50950 -0.27 0.07
+At5g55070 -0.28 0.07
+At5g65750 -0.48 0.07
+At5g66760 -0.36 0.10
+At1g22940 0.03 0.08
+At4g35460 -0.02 0.11
+At1g72810 -0.20 0.06
+At4g29840 -0.15 0.07
+At1g11840 -0.33 0.10
+At3g10850 -0.46 0.07
+At1g06410 -0.27 0.07
+At1g16980 -0.27 0.07
+At1g22210 -0.39 0.06
+At1g23870 -0.24 0.06
+At1g35910 -0.48 0.07
+At1g60140 -0.23 0.07
+At1g68020 -0.21 0.07
+At1g70290 -0.31 0.06
+At1g78580 -0.44 0.06
+At2g18700 -0.36 0.06
+At2g22190 -0.44 0.06
+At4g12430 -0.44 0.05
+At4g17770 -0.23 0.06
+At4g22590 -0.44 0.05
+At4g27550 -0.20 0.07
+At5g10100 -0.44 0.05
+At5g51460 -0.33 0.05
+At5g65140 -0.41 0.06
+At4g24040 -0.36 0.05
+At1g15080 0.39 0.08
+At1g51260 0.29 0.04
+At1g75020 0.24 0.05
+At2g01180 0.10 0.08
+At2g19450 0.24 0.07
+At3g18850 0.15 0.04
+At1g06250 -0.42 0.06
+At1g06800 -0.54 0.07
+At1g09390 0.05 0.10
+At1g10740 -0.26 0.09
+At1g18460 -0.23 0.07
+At1g27690 -0.42 0.09
+At1g28640 -0.06 0.08
+At1g28670 -0.08 0.09
+At1g30370 -0.47 0.06
+At1g53940 -0.22 0.09
+At1g53990 -0.29 0.08
+At1g73920 -0.29 0.06
+At2g27360 -0.12 0.08
+At2g30550 -0.43 0.06
+At2g31100 -0.50 0.06
+At2g31690 -0.34 0.05
+At2g42690 -0.33 0.08
+At3g44520 -0.35 0.10
+At3g48460 -0.13 0.07
+At3g55190 -0.28 0.07
+At4g16820 -0.53 0.06
+At4g18550 -0.47 0.08
+At5g03980 -0.16 0.08
+At5g14180 0.00 0.09
+At5g18630 -0.16 0.06
+At5g18640 -0.01 0.06
+At1g11870 -0.30 0.06
+At1g14610 -0.43 0.06
+At1g17960 -0.41 0.06
+At1g27160 -0.08 0.02
+At1g29870 -0.40 0.06
+At1g29880 -0.32 0.05
+At1g50200 -0.33 0.08
+At1g66530 -0.23 0.07
+At1g68420 -0.20 0.02
+At1g70980 -0.24 0.07
+At2g31170 -0.41 0.06
+At2g33840 -0.42 0.05
+At2g40660 -0.38 0.06
+At3g02660 -0.14 0.08
+At3g02760 -0.38 0.07
+At3g04600 -0.38 0.06
+At3g07420 -0.31 0.06
+At3g11710 -0.55 0.06
+At3g13490 -0.41 0.06
+At3g44740 -0.10 0.05
+At3g46100 -0.14 0.08
+At3g48110 -0.17 0.06
+At3g55400 -0.31 0.06
+At3g56300 -0.39 0.07
+At4g04350 -0.49 0.06
+At4g10320 -0.31 0.06
+At4g13780 -0.42 0.06
+At4g17300 -0.28 0.07
+At4g26300 -0.25 0.06
+At4g26870 -0.39 0.06
+At4g31180 -0.37 0.06
+At4g33760 -0.20 0.06
+At4g39280 -0.54 0.07
+At5g16710 -0.27 0.06
+At5g19720 -0.65 0.05
+At5g26710 -0.40 0.06
+At5g26830 -0.42 0.06
+At5g27470 -0.54 0.06
+At5g38750 -0.32 0.05
+At5g38830 -0.44 0.06
+At5g49030 -0.36 0.07
+At5g52520 -0.35 0.06
+At5g64050 -0.26 0.08
+At1g29410 -0.32 0.07
+At1g51110 -0.22 0.06
+At1g51570 -0.23 0.06
+At2g04400 -0.21 0.07
+At2g29690 -0.33 0.05
+At3g54640 0.07 0.08
+At3g55870 -0.30 0.05
+At3g57880 -0.27 0.05
+At3g61720 -0.08 0.04
+At4g02610 0.01 0.07
+At4g27070 -0.25 0.09
+At5g05730 -0.31 0.06
+At5g06850 -0.20 0.05
+At5g12970 -0.18 0.05
+At5g17990 0.17 0.09
+At5g38530 -0.25 0.08
+At5g48220 -0.24 0.08
+At5g54810 -0.22 0.10
+At5g48230 0.18 0.13
+At1g06570 -0.28 0.09
+At1g12050 -0.23 0.09
+At2g20610 -0.15 0.06
+At2g24850 -0.10 0.07
+At2g47510 -0.17 0.08
+At4g00570 -0.09 0.09
+At4g23590 -0.14 0.06
+At4g23600 -0.15 0.05
+At4g28420 -0.08 0.06
+At5g36160 0.12 0.06
+At5g50950 -0.27 0.07
+At5g53970 0.07 0.07
+At5g54080 -0.23 0.08
+At1g15690 0.61 0.11
+At1g30620 -0.28 0.08
+At1g64440 -0.22 0.10
+At2g18230 -0.34 0.05
+At2g34850 -0.46 0.08
+At2g46860 -0.41 0.03
+At3g03250 -0.15 0.06
+At3g53620 -0.26 0.04
+At4g10960 -0.20 0.09
+At4g20460 -0.16 0.08
+At4g23920 -0.28 0.09
+At5g09650 -0.40 0.07
+At5g44480 -0.25 0.08
+At1g31070 -0.42 0.08
+At2g35020 -0.19 0.07
+At5g18070 -0.16 0.09
+At1g16350 -0.12 0.10
+At1g79470 -0.10 0.11
+At2g26230 -0.29 0.06
+At4g34890 -0.10 0.08
+At4g34900 -0.09 0.08
+At1g67550 -0.10 0.10
+At2g31810 -0.07 0.06
+At3g23940 -0.10 0.11
+At3g48560 -0.13 0.08
+At3g58610 -0.07 0.09
+At5g16290 -0.15 0.06
+At1g21400 -0.45 0.06
+At3g25530 0.07 0.10
+At3g60510 -0.17 0.08
+At4g16210 -0.06 0.08
+At4g16800 0.07 0.09
+At4g20930 -0.13 0.09
+At4g31810 -0.19 0.07
+At5g43280 0.06 0.07
+At1g06570 -0.28 0.09
+At1g64970 -0.18 0.07
+At2g18950 0.55 0.05
+At3g63410 -0.29 0.08
+At4g32770 -0.43 0.09
+At1g08550 -0.34 0.06
+At5g67030 -0.30 0.09
+At2g21370 -0.25 0.09
+At5g49650 -0.22 0.08
+At5g57655 -0.40 0.09
+At1g43670 -0.15 0.09
+At1g48430 0.09 0.10
+At2g01140 -0.26 0.08
+At2g21330 -0.13 0.08
+At2g36460 -0.15 0.09
+At2g45290 -0.25 0.09
+At3g17770 0.09 0.10
+At3g52930 -0.22 0.09
+At3g54050 -0.22 0.09
+At4g10750 -0.20 0.09
+At4g26520 -0.08 0.08
+At4g26530 -0.11 0.09
+At4g38970 -0.18 0.08
+At5g03690 -0.19 0.09
+At5g64380 -0.24 0.09
\ No newline at end of file
diff --git a/inst/sequences/humanMito.fasta b/inst/sequences/humanMito.fasta
new file mode 100644
index 0000000..ed95491
--- /dev/null
+++ b/inst/sequences/humanMito.fasta
@@ -0,0 +1,283 @@
+>gi|17981852|ref|NC_001807.4| Homo sapiens mitochondrion, complete genome
+;
+; This file was send by Stefanie Hartmann on Wed, 16 Jan 2008 09:54:03 +0100
+; in order to reproduce a behaviour of the count() function.
+;
+GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTT
+CGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTC
+GCAGTATCTGTCTTTGATTCCTGCCTCATTCTATTATTTATCGCACCTACGTTCAATATT
+ACAGGCGAACATACCTACTAAAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATA
+ACAATTGAATGTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA
+AACCCCCCCCTCCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCAAACCCCAA
+AAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTTAGGCGGTATGC
+ACTTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTA
+ATCTCATCAATACAACCCCCGCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCA
+TACCCCGAACCAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCA
+AAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAAATAGGTTTGG
+TCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGTTCCAGTGA
+GTTCACCCTCTAAATCACCACGATCAAAAGGGACAAGCATCAAGCACGCAGCAATGCAGC
+TCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAAT
+AAACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACC
+GCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCC
+CCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAG
+ACTACGAAAGTGGCTTTAACATATCTGAACACACAATAGCTAAGACCCAAACTGGGATTA
+GATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAG
+AACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGA
+GGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATA
+TACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAA
+AGACGTTAGGTCAAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCC
+AGAAAACTACGATAGCCCTTATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTG
+AGAGTAGAGTGCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCC
+TCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATAGAGGAGACAA
+GTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTAGCTTAACA
+CAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACC
+TAGCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAAT
+AAAGTATAGGCGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGA
+TGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTTCTGCATAATG
+AATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAAGACCCCCGAAACCAGACGAG
+CTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCAAAATAGTGGGAAGATTTA
+TAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTT
+AGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAG
+TCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATT
+TAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACC
+CACTACCTAAAAAATCCCAAACATATAACTGAACTCCTCACACCCAATTGGACCAATCTA
+TCACCCTATAGAAGAACTAATGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAA
+GCCTGCGTCAGATCAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCA
+ACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGAAAGGTTAA
+AAAAAGTAAAAGGAACTCGGCAAACCTTACCCCGCCTGTTTACCAAAAACATCACCTCTA
+GCATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACC
+CTAACCGTGCAAAGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCT
+CCACGAGGGTTCAGCTGTCTCTTACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGG
+CGGGCATGACACAGCAAGACGAGAAGACCCTATGGAGCTTTAATTTATTAATGCAAACAG
+TACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGC
+GACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAGTCAAAGCG
+AACTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAA
+CAGCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGAT
+CAGGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCT
+ACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTTCTATCTACTTCAAATTCCTC
+CCTGTACGAAAGGACAAGAGAAATAAGGCCTACTTCACAAAGCGCCTTCCCCCGTAAATG
+ATATCATCTCAACTTAGTATTATACCCACACCCACCCAAGAACAGGGTTTGTTAAGATGG
+CAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAGTCAGAGGTTCAATTCCTCTTCT
+TAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGC
+ATTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAA
+CGTTGTAGGCCCCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAA
+AGAGCCCCTAAAACCCGCCACATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGC
+TCTCACCATCGCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCT
+CAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACTCAATCCTCTG
+ATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGCAGTAGCCCA
+AACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGG
+CTCCTTTAACCTCTCCACCCTTATCACAACACAAGAACACCTCTGATTACTCCTGCCATC
+ATGACCCTTGGCCATAATATGATTTATCTCCACACTAGCAGAGACCAACCGAACCCCCTT
+CGACCTTGCCGAAGGGGAGTCCGAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGG
+CCCCTTCGCCCTATTCTTCATAGCCGAATACACAAACATTATTATAATAAACACCCTCAC
+CACTACAATCTTCCTAGGAACAACATATGACGCACTCTCCCCTGAACTCTACACAACATA
+TTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCC
+CCGATTCCGCTACGACCAACTCATACACCTCCTATGAAAAAACTTCCTACCACTCACCCT
+AGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAAC
+CTAAGAAATATGTCTGATAAAAGAGTTACTTTGATAGAGTAAATAATAGGAGCTTAAACC
+CCCTTATTTCTAGGACTATGAGAATCGAACCCATCCCTGAGAATCCAAAATTCTCCGTGC
+CACCTATCACACCCCATCCTAAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCATACCCC
+GAAAATGTTGGTTATACCCTTCCCGTACTAATTAATCCCCTGGCCCAACCCGTCATCTAC
+TCTACCATCTTTGCAGGCACACTCATCACAGCGCTAAGCTCGCACTGATTTTTTACCTGA
+GTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAAAAATAAACCCT
+CGTTCCACAGAAGCTGCCATCAAGTATTTCCTCACGCAAGCAACCGCATCCATAATCCTT
+CTAATAGCTATCCTCTTCAACAATATACTCTCCGGACAATGAACCATAACCAATACTACC
+AATCAATACTCATCATTAATAATCATAATGGCTATAGCAATAAAACTAGGAATAGCCCCC
+TTTCACTTCTGAGTCCCAGAGGTTACCCAAGGCACCCCTCTGACATCCGGCCTGCTTCTT
+CTCACATGACAAAAACTAGCCCCCATCTCAATCATATACCAAATCTCTCCCTCACTAAAC
+GTAAGCCTTCTCCTCACTCTCTCAATCTTATCCATCATAGCAGGCAGTTGAGGTGGATTA
+AACCAAACCCAGCTACGCAAAATCTTAGCATACTCCTCAATTACCCACATAGGATGAATA
+ATAGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATC
+CTAACTACTACCGCATTCCTACTACTCAACTTAAACTCCAGCACCACGACCCTACTACTA
+TCTCGCACCTGAAACAAGCTAACATGACTAACACCCTTAATTCCATCCACCCTCCTCTCC
+CTAGGAGGCCTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGCCATTATCGAAGAATTC
+ACAAAAAACAATAGCCTCATCATCCCCACCATCATAGCCACCATCACCCTCCTTAACCTC
+TACTTCTACCTACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAAC
+GTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCCCACACTCATC
+GCCCTTACCACGCTACTCCTACCTATCTCCCCTTTTATACTAATAATCTTATAGAAATTT
+AGGTTAAATACAGACCAAGAGCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTG
+CAACAGCTAAGGACTGCAAAACCCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTA
+ATTAAGCTAAGCCCTTACTAGACCAATGGGACTTAAACCCACAAACACTTAGTTAACAGC
+TAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCGCCGGGAAAAAAGGCGGGA
+GAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCT
+CGGAGCTGGTAAAAAGAGGCCTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTC
+AGCCATTTTACCTCACCCCCACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCAC
+AAAGACATTGGAACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCT
+CTAAGCCTCCTTATTCGAGCCGAGCTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCAC
+ATCTACAACGTTATCGTCACAGCCCATGCATTTGTAATAATCTTCTTCATAGTAATACCC
+ATCATAATCGGAGGCTTTGGCAACTGACTAGTTCCCCTAATAATCGGTGCCCCCGATATG
+GCGTTTCCCCGCATAAACAACATAAGCTTCTGACTCTTACCTCCCTCTCTCCTACTCCTG
+CTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTA
+GCAGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTCCTTACACCTA
+GCAGGTGTCTCCTCTATCTTAGGGGCCATCAATTTCATCACAACAATTATCAATATAAAA
+CCCCCTGCCATAACCCAATACCAAACGCCCCTCTTCGTCTGATCCGTCCTAATCACAGCA
+GTCCTACTTCTCCTATCTCTCCCAGTCCTAGCTGCTGGCATCACTATACTACTAACAGAC
+CGCAACCTCAACACCACCTTCTTCGACCCCGCCGGAGGAGGAGACCCCATTCTATACCAA
+CACCTATTCTGATTTTTCGGTCACCCTGAAGTTTATATTCTTATCCTACCAGGCTTCGGA
+ATAATCTCCCATATTGTAACTTACTACTCCGGAAAAAAAGAACCATTTGGATACATAGGT
+ATGGTCTGAGCTATGATATCAATTGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATA
+TTTACAGTAGGAATAGACGTAGACACACGAGCATATTTCACCTCCGCTACCATAATCATC
+GCTATCCCCACCGGCGTCAAAGTATTTAGCTGACTCGCCACACTCCACGGAAGCAATATG
+AAATGATCTGCTGCAGTGCTCTGAGCCCTAGGATTCATCTTTCTTTTCACCGTAGGTGGC
+CTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCGTACTACACGACACGTACTAC
+GTTGTAGCTCACTTCCACTATGTCCTATCAATAGGAGCTGTATTTGCCATCATAGGAGGC
+TTCATTCACTGATTTCCCCTATTCTCAGGCTACACCCTAGACCAAACCTACGCCAAAATC
+CATTTCACTATCATATTCATCGGCGTAAATCTAACTTTCTTCCCACAACACTTTCTCGGC
+CTATCCGGAATGCCCCGACGTTACTCGGACTACCCCGATGCATACACCACATGAAACATC
+CTATCATCTGTAGGCTCATTCATTTCTCTAACAGCAGTAATATTAATAATTTTCATGATT
+TGAGAAGCCTTCGCTTCGAAGCGAAAAGTCCTAATAGTAGAAGAACCCTCCATAAACCTG
+GAGTGACTATATGGATGCCCCCCACCCTACCACACATTCGAAGAACCCGTATACATAAAA
+TCTAGACAAAAAAGGAAGGAATCGAACCCCCCAAAGCTGGTTTCAAGCCAACCCCATGGC
+CTCCATGACTTTTTCAAAAAGGTATTAGAAAAACCATTTCATAACTTTGTCAAAGTTAAA
+TTATAGGCTAAATCCTATATATCTTAATGGCACATGCAGCGCAAGTAGGTCTACAAGACG
+CTACTTCCCCTATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTT
+TCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTA
+ATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCA
+TCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGACGAGGTCAACG
+ATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTACTGAACCTACGAGTACACCG
+ACTACGGCGGACTAATCTTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCG
+ACCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTA
+TAATAATTACATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAA
+CAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTAT
+ACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAG
+AATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCT
+ACCCCCTCTAGAGCCCACTGTAAAGCTAACTTAGCATTAACCTTTTAAGTTAAAGATTAA
+GAGAACCAACACCTCTTTACAGTGAAATGCCCCAACTAAATACTACCGTATGGCCCACCA
+TAATTACCCCCATACTCCTTACACTATTCCTCATCACCCAACTAAAAATATTAAACACAA
+ACTACCACCTACCTCCCTCACCAAAGCCCATAAAAATAAAAAATTATAACAAACCCTGAG
+AACCAAAATGAACGAAAATCTGTTCGCTTCATTCATTGCCCCCACAATCCTAGGCCTACC
+CGCCGCAGTACTGATCATTCTATTTCCCCCTCTATTGATCCCCACCTCCAAATATCTCAT
+CAACAACCGACTAATCACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGAT
+AGCCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCATTTT
+TATTGCCACAACTAACCTCCTCGGACTCCTGCCTCACTCATTTACACCAACCACCCAACT
+ATCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCGCAGTGATTATAGGCTTTCG
+CTCTAAGATTAAAAATGCCCTAGCCCACTTCTTACCACAAGGCACACCTACACCCCTTAT
+CCCCATACTAGTTATTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGT
+ACGCCTAACCGCTAACATTACTGCAGGCCACCTACTCATGCACCTAATTGGAAGCGCCAC
+CCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATCATCTTCACAATTCTAATTCT
+ACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCACACTTCTAGT
+AAGCCTCTACCTGCACGACAACACATAATGACCCACCAATCACATGCCTATCATATAGTA
+AAACCCAGCCCATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTCCGGCCTA
+GCCATGTGATTTCACTTCCACTCCATAACGCTCCTCATACTAGGCCTACTAACCAACACA
+CTAACCATATACCAATGGTGGCGCGATGTAACACGAGAAAGCACATACCAAGGCCACCAC
+ACACCACCTGTCCAAAAAGGCCTTCGATACGGGATAATCCTATTTATTACCTCAGAAGTT
+TTTTTCTTCGCAGGATTTTTCTGAGCCTTTTACCACTCCAGCCTAGCCCCTACCCCCCAA
+CTAGGAGGGCACTGGCCCCCAACAGGCATCACCCCGCTAAATCCCCTAGAAGTCCCACTC
+CTAAACACATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCATAGTCTA
+ATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTTTACTGGGTCTC
+TATTTTACCCTCCTACAAGCCTCAGAGTACTTCGAGTCTCCCTTCACCATTTCCGACGGC
+ATCTACGGCTCAACATTTTTTGTAGCCACAGGCTTCCACGGACTTCACGTCATTATTGGC
+TCAACTTTCCTCACTATCTGCTTCATCCGCCAACTAATATTTCACTTTACATCCAAACAT
+CACTTTGGCTTCGAAGCCGCCGCCTGATACTGGCATTTTGTAGATGTGGTTTGACTATTT
+CTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTTTAGTATAAATAGTACCGTTAAC
+TTCCAATTAACTAGTTTTGACAACATTCAAAAAAGAGTAATAAACTTCGCCTTAATTTTA
+ATAATCAACACCCTCCTAGCCTTACTACTAATAATTATTACATTTTGACTACCACAACTC
+AACGGCTACATAGAAAAATCCACCCCTTACGAGTGCGGCTTCGACCCTATATCCCCCGCC
+CGCGTCCCTTTCTCCATAAAATTCTTCTTAGTAGCTATTACCTTCTTATTATTTGATCTA
+GAAATTGCCCTCCTTTTACCCCTACCATGAGCCCTACAAACAACTAACCTGCCACTAATA
+GTTATGTCATCCCTCTTATTAATCATCATCCTAGCCCTAAGTCTGGCCTATGAGTGACTA
+CAAAAAGGATTAGACTGAGCCGAATTGGTATATAGTTTAAACAAAACGAATGATTTCGAC
+TCATTAAATTATGATAATCATATTTACCAAATGCCCCTCATTTACATAAATATTATACTA
+GCATTTACCATCTCACTTCTAGGAATACTAGTATATCGCTCACACCTCATATCCTCCCTA
+CTATGCCTAGAAGGAATAATACTATCGCTGTTCATTATAGCTACTCTCATAACCCTCAAC
+ACCCACTCCCTCTTAGCCAATATTGTGCCTATTGCCATACTAGTCTTTGCCGCCTGCGAA
+GCAGCGGTGGGCCTAGCCCTACTAGTCTCAATCTCCAACACATATGGCCTAGACTACGTA
+CATAACCTAAACCTACTCCAATGCTAAAACTAATCGTCCCAACAATTATATTACTACCAC
+TGACATGACTTTCCAAAAAGCACATAATTTGAATCAACACAACCACCCACAGCCTAATTA
+TTAGCATCATCCCCCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCC
+CAACCTTTTCCTCCGACCCCCTAACAACCCCCCTCCTAATACTAACTACCTGACTCCTAC
+CCCTCACAATCATGGCAAGCCAACGCCACTTATCCAGCGAACCACTATCACGAAAAAAAC
+TCTACCTCTCTATACTAATCTCCCTACAAATCTCCTTAATTATAACATTCACAGCCACAG
+AACTAATCATATTTTATATCTTCTTCGAAACCACACTTATCCCCACCTTGGCTATCATCA
+CCCGATGAGGCAACCAGCCAGAACGCCTGAACGCAGGCACATACTTCCTATTCTACACCC
+TAGTAGGCTCCCTTCCCCTACTCATCGCACTAATTTACACTCACAACACCCTAGGCTCAC
+TAAACATTCTACTACTCACTCTCACTGCCCAAGAACTATCAAACTCCTGAGCCAACAACT
+TAATATGACTAGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGACTCCACT
+TATGACTCCCTAAAGCCCATGTCGAAGCCCCCATCGCTGGGTCAATAGTACTTGCCGCAG
+TACTCTTAAAACTAGGCGGCTATGGTATAATACGCCTCACACTCATTCTCAACCCCCTGA
+CAAAACACATAGCCTACCCCTTCCTTGTACTATCCCTATGAGGCATAATTATAACAAGCT
+CCATCTGCCTACGACAAACAGACCTAAAATCGCTCATTGCATACTCTTCAATCAGCCACA
+TAGCCCTCGTAGTAACAGCCATTCTCATCCAAACCCCCTGAAGCTTCACCGGCGCAGTCA
+TTCTCATAATCGCCCACGGACTCACATCCTCATTACTATTCTGCCTAGCAAACTCAAACT
+ACGAACGCACTCACAGTCGCATCATAATCCTCTCTCAAGGACTTCAAACTCTACTCCCAC
+TAATAGCTTTTTGATGACTTCTAGCAAGCCTCGCTAACCTCGCCTTACCCCCCACTATTA
+ACCTACTGGGAGAACTCTCTGTGCTAGTAACCACGTTCTCCTGATCAAATATCACTCTCC
+TACTTACAGGACTCAACATACTAGTCACAGCCCTATACTCCCTCTACATATTTACCACAA
+CACAATGGGGCTCACTCACCCACCACATTAACAACATAAAACCCTCATTCACACGAGAAA
+ACACCCTCATGTTCATACACCTATCCCCCATTCTCCTCCTATCCCTCAACCCCGACATCA
+TTACCGGGTTTTCCTCTTGTAAATATAGTTTAACCAAAACATCAGATTGTGAATCTGACA
+ACAGAGGCTTACGACCCCTTATTTACCGAGAAAGCTCACAAGAACTGCTAACTCATGCCC
+CCATGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTA
+GGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCATGCACACTACTATAAC
+CACCCTAACCCTGACTTCCCTAATTCCCCCCATCCTTACCACCCTCGTTAACCCTAACAA
+AAAAAACTCATACCCCCATTATGTAAAATCCATTGTCGCATCCACCTTTATTATCAGTCT
+CTTCCCCACAACAATATTCATGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTG
+AGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAAT
+ATTCATCCCTGTAGCATTGTTCGTTACATGGTCCATCATAGAATTCTCACTGTGATATAT
+AAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTACTCATTTTCCTAATTACCAT
+ACTAATCTTAGTTACCGCTAACAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGG
+AATTATATCCTTCTTGCTCATCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGC
+CATTCAAGCAGTCCTATACAACCGTATCGGCGATATCGGTTTCATCCTCGCCTTAGCATG
+ATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAACGCTAATCC
+AAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCAGGCAAATCAGCCCAATTAGG
+TCTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCA
+CTCAAGCACTATAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGA
+AAATAGCCCACTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGC
+AGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCTCCACTTCAAG
+TCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAACCACACCTAGCATTCCTGCA
+CATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTCCATCATCCA
+CAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCT
+CACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGG
+TTTCTACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGC
+CCTATCTATTACTCTCATCGCTACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCT
+TCTCACCCTAACAGGTCAACCTCGCTTCCCCACCCTTACTAACATTAACGAAAATAACCC
+CACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCAT
+TACTAACAACATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACT
+CACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCAACTACCTAAC
+CAACAAACTTAAAATAAAATCCCCACTATGCACATTTTATTTCTCCAACATACTCGGATT
+CTACCCTAGCATCACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCT
+GCCCCTACTCCTCCTAGACCTAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACA
+GCACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTT
+CCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAACCTATTCCCCC
+GAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAACCACTACTA
+ATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTG
+ACCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCA
+CCCCATCATACTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAA
+CACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCG
+CTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATAAATTAAAAAAACTATTAAAC
+CCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCA
+GTACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTA
+AACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGA
+CCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATAC
+GCAAAATTAACCCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCAT
+CCAACATCTCCGCATGATGAAACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAA
+TCACCACAGGACTATTCCTAGCCATACACTACTCACCAGACGCCTCAACCGCCTTTTCAT
+CAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCA
+ATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCCTATATTACG
+GATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAG
+CAACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAG
+TAATTACAAACTTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCT
+GAGGAGGCTACTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCT
+TACCCTTCATTATTGCAGCCCTAGCAGCACTCCACCTCCTATTCTTGCACGAAACGGGAT
+CAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATCACCTTCCACCCTTACTACA
+CAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACATTAACACTAT
+TCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCC
+CTCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCC
+CTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCC
+CCATCCTCCATATATCCAAACAACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTT
+ATTGACTCCTAGCCGCAGACCTCCTCATTCTAACCTGAATCGGAGGACAACCAGTAAGCT
+ACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCC
+TAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTA
+TAAACTAATACACCAGTCTTGTAAACCGGAGACGAAAACCTTTTTCCAAGGACAAATCAG
+AGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTC
+TCTGTTCTTTCATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAAC
+AACCGCTATGTATTTCGTACATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAA
+TACTTGACCACCTGTAGTACATAAAAACCCAACCCACATCAAACCCCCCCCCCCCATGCT
+TACAAGCAAGTACAGCAATCAACCTTCAACTATCACACATCAACTGCAACTCCAAAGCCA
+CCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAGTACATAAA
+GTCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCC
+CCTCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTG
+CTACTCTCCTCGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGAC
+ATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAG
+ACATCACGATG
+
diff --git a/inst/sequences/input.dat b/inst/sequences/input.dat
new file mode 100755
index 0000000..740568c
--- /dev/null
+++ b/inst/sequences/input.dat
@@ -0,0 +1,2835 @@
+>YCG9 Probable 1377 residues Pha 0 Code 0
+ATGAATATGCTCATTGTCGGTAGAGTTGTTGCTAGTGTTGGGGGAAGCGGACTTCAAACG
+CTTTGCTTTGTTATTGGTTGTACGATGGTTGGTGAAAGGTCACGTCCATTGGTGATTTCC
+ATCCTAAGTTGTGCATTTGCTGTAGCTGCTATCGTTGGTCCTATAATCGGAGGTGCCTTT
+ACAACCCATGTTACCTGGAGGTGGTGCTTCTATATCAATCTTCCTATCGGTGGTCTTGCC
+ATTATTATGTTTTTACTCACATATAAGGCCGAGAATAAGGGTATACTTCAACAAATTAAA
+GATGCTATAGGAACAATCTCGAGCTTTACTTTTAGTAAGTTCAGACACCAAGTTAATTTT
+AAAAGACTTATGAATGGCATAATCTTCAAGTTTGACTTCTTTGGTTTTGCCCTCTGCTCT
+GCAGGGCTGGTCCTTTTCCTACTGGGGCTAACCTTTGGTGGTAATAAATATAGTTGGAAC
+TCTGGCCAAGTCATCGCATATTTGGTTTTGGGTGTCTTACTTTTTATTTTTTCATTGGTG
+TACGATTTCTTCTTATTCGATAAATTCAACCCGGAACCTGATAATATATCCTACAGGCCT
+CTCCTTCTAAGAAGATTGGTAGCAAAACCAGCCATAATAATAATAAACATGGTAACATTT
+CTATTATGTACCGGTTACAATGGGCAAATGATATACTCTGTCCAGTTTTTCCAACTTATA
+TTTGCGTCGAGTGCATGGAAAGCCGGTCTTCACTTGATACCAATCGTTATTACCAACGTT
+ATTGCGGCCATTGCAAGTGGTGTGATTACCAAAAAGCTCGGTTTAGTTAAACCACTCTTA
+ATATTTGGAGGCGTTCTTGGGGTAATTGGAGCAGGGCTTATGACACTTATGACAAATACG
+TCCACGAAGTCAACTCAAATTGGTGTTTTGCTATTACCGGGGTTTTCCCTTGGATTTGCT
+CTACAAGCATCGCTCATGAGTGCACAGCTTCAAATTACCAAAGATCGTCCAGAAGCTGCT
+ATGGACTTTATTGAAGTAACAGCTTTCAATACATTCATGAAGTCATTAGGTACAACTCTT
+GGTGGTGTGCTTTCAACCACTGTTTTTTCCGCCTCCTTTCACAACAAAGTATCACGAGCT
+CATCTAGAGCCTTACGAAGGAAAAACGGTTGATGACATGATTTTGTATCGTCTTCAAAAC
+TACGACGGTTCTCATTCGACTATTGGAAACATTTTAAGCGACTCCATTAAGAACGTATTT
+TGGATGGATCTAGGGTTTTATGCCTTAGGATTTTTGTTTTGTAGTTTTTCATCCAATAAG
+AAATTAATCATACCAAAAAAGGACGAGACACCAGAAGATAATTTAGAAGACAAGTAG
+>YCG8 573 residues Pha 0 Code 0
+ATGAGAACGGCCGTACCGCAGTTGCTGGAAGCAACTGCCTGTGTCTCTAGAGAATGCCCC
+CTCGTCAAAAGAAGTCAGGACATAAAAAGAGCAAGAAAACGTCTACTCAGTGACTGGTAT
+AGGCTCGGCGCTGATGCAAACATGGATGCCGTATTACTAGTTGTTAACTCCGCCTGGAGG
+TTTCTGGCCGTCTGGCGACCCTTCGTAAACTCAATCCAACATGCAACTCAGGAATTGTAT
+CAAAATATCGCCCATTACCTTCTTCATGGCAACGTAAATATACAGAGGGTCACAGCACTA
+CTACAGCTCGTAATGGGACAGGACGATTTACTTTTTAGTATGGATGATGTTCTACAAGAG
+GTCTTCAGAATACAGCTCTATTTGAATAAGATGCTGCCGCACAACTCTCACAAATGGCAA
+AAGCCATCCCCCTTTGACTCCGCAAACTTACTACTTAACTTCAGAGACTGGACAACTGAC
+AATGCTCTCCTCCAAGAGTTGCTACTATCCTATCCCACAATTAATAAAAACAAACACAAA
+AATCACTCCGTCCCTCGTCTAATACAAGTTTGA
+>ALPHA2 633 residues Pha 0 Code 0
+ATGAATAAAATACCCATTAAAGACCTTTTAAATCCACAAATCACAGATGAGTTTAAATCC
+AGCATACTAGACATAAATAAAAAGCTCTTTTCTATTTGCTGTAATTTACCTAAGTTACCA
+GAGAGTGTAACAACAGAAGAAGAAGTTGAATTAAGGGATATATTAGGATTCTTATCTAGG
+GCCAACAAAAACCGTAAGATTAGTGATGAGGAGAAGAAGTTGTTGCAAACAACATCTCAA
+CTCACTACTACCATTACTGTATTACTCAAAGAAATGCGCAGCATAGAAAACGATAGAAGT
+AATTATCAACTTACACAGAAAAATAAATCGGCGGATGGGTTGGTATTTAATGTGGTAACT
+CAAGATATGATAAACAAAAGTACTAAACCTTACAGAGGACACCGGTTTACAAAAGAAAAT
+GTCCGAATACTAGAAAGTTGGTTTGCAAAGAACATCGAGAACCCATATCTAGATACCAAG
+GGCCTAGAGAATCTAATGAAGAATACCAGTTTATCTCGCATTCAAATCAAAAACTGGGTT
+TCGAATAGAAGAAGAAAAGAAAAAACAATAACAATCGCTCCAGAATTAGCGGACCTCTTG
+AGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA
+>ALPHA1 528 residues Pha 0 Code 0
+ATGTTTACTTCGAAGCCTGCTTTCAAAATTAAGAACAAAGCATCCAAATCATACAGAAAC
+ACAGCGGTTTCAAAAAAGCTGAAAGAAAAACGTCTAGCTGAGCATGTGAGGCCAAGCTGC
+TTCAATATTATTCGACCACTCAAGAAAGATATCCAGATTCCTGTTCCTTCCTCTCGATTT
+TTAAATAAAATCCAAATTCACAGGATAGCGTCTGGAAGTCAAAATACTCAGTTTCGACAG
+TTCAATAAGACATCTATAAAATCTTCAAAGAAATATTTAAACTCATTTATGGCTTTTAGA
+GCATATTACTCACAGTTTGGCTCCGGTGTAAAACAAAATGTCTTGTCTTCTCTGCTCGCT
+GAAGAATGGCACGCGGACAAAATGCAGCACGGAATATGGGACTACTTCGCGCAACAGTAT
+AATTTTATAAACCCTGGTTTTGGTTTTGTAGAGTGGTTGACGAATAATTATGCTGAAGTA
+CGTGGTGACGGATATTGGGAAGATGTGTTTGTACATTTGGCCTTATAG
+>CHA1 1083 residues Pha 0 Code 0
+ATGTCGATAGTCTACAATAAAACACCATTATTACGTCAATTCTTCCCCGGAAAGGCTTCT
+GCACAATTTTTCTTGAAATATGAATGCCTTCAACCAAGTGGCTCCTTCAAAAGTAGAGGA
+ATCGGTAATCTCATCATGAAAAGTGCCATTCGAATTCAAAAGGACGGTAAAAGATCTCCT
+CAGGTTTTCGCTAGTTCTGGCGGTAATGCCGGTTTTGCTGCTGCAACAGCATGTCAAAGA
+CTGTCTCTACCATGTACAGTCGTGGTTCCTACAGCGACAAAGAAGAGAATGGTAGATAAA
+ATCAGGAACACCGGTGCCCAGGTTATCGTGAGTGGTGCCTACTGGAAAGAAGCAGATACT
+TTTTTAAAAACAAATGTCATGAATAAAATAGACTCTCAGGTCATTGAGCCCATTTATGTT
+CATCCCTTCGATAATCCGGATATTTGGGAAGGACATTCATCTATGATAGATGAAATAGTA
+CAAGATTTGAAATCGCAACATATTTCCGTGAATAAGGTTAAAGGCATAGTATGCAGCGTT
+GGTGGAGGTGGTTTATACAATGGTATTATTCAAGGTTTGGAAAGGTATGGTTTAGCTGAT
+AGGATCCCTATTGTGGGGGTGGAAACGAATGGATGTCATGTTTTCAATACTTCTTTGAAA
+ATAGGCCAACCAGTTCAATTCAAGAAGATAACAAGTATTGCTACTTCTCTAGGAACGGCC
+GTGATCTCTAATCAAACTTTCGAATACGCTCGCAAATACAACACCAGATCCGTTGTAATA
+GAGGACAAAGATGTTATTGAACCCTGTCTTAAATATACACATCAATTCAATATGGTGATT
+GAACCGGCATGTGGCGCCGCATTGCATTTGGGTTACAACACTAAGATCCTAGAAAATGCA
+CTGGGCTCAAAATTAGCTGCGGATGACATTGTGATAATTATTGCTTGTGCGAGCTCCTCT
+AATACTATAAAGGACTTGGAAGAAGCGTTGGATAGCATGAGAAAAAAAGACACTCCTGTA
+ATAGAAGTCGCTGACAATTTCATATTTCCAGAAAAAAATATTGTGAATTTAAAAAGTGCT
+TGA
+>KRR1 951 residues Pha 0 Code 0
+ATGGTGTCTACACATAACAGAGATAAACCTTGGGATACGGATGATATTGATAAATGGAAG
+ATAGAGGAGTTTAAGGAAGAGGATAACGCATCCGGTCAACCTTTTGCTGAAGAGTCCAGT
+TTTATGACTTTGTTTCCTAAATACAGAGAAAGTTACTTGAAGACGATTTGGAATGATGTA
+ACAAGGGCTCTAGACAAACACAACATAGCGTGTGTTCTAGATTTAGTCGAAGGTTCTATG
+ACAGTAAAAACAACTAGAAAAACATACGATCCCGCTATCATTTTGAAAGCCAGAGATTTG
+ATCAAATTATTGGCGAGATCCGTTCCTTTCCCGCAAGCCGTTAAGATCCTACAAGATGAC
+ATGGCATGCGACGTTATTAAAATTGGTAATTTCGTTACTAACAAAGAAAGGTTTGTCAAG
+AGAAGACAACGTCTTGTAGGCCCTAACGGTAATACTTTAAAGGCTTTGGAACTTCTAACT
+AAATGTTACATTCTAGTACAAGGTAACACAGTAAGTGCCATGGGTCCCTTCAAGGGCTTG
+AAGGAGGTCCGTCGAGTAGTAGAAGATTGTATGAAAAATATTCACCCTATCTATCATATC
+AAGGAATTAATGATAAAAAGAGAATTGGCAAAAAGGCCAGAGTTAGCCAATGAAGATTGG
+TCAAGATTCTTGCCCATGTTTAAGAAGAGGAATGTGGCCAGAAAGAAACCCAAGAAGATC
+AGAAACGTCGAAAAGAAGGTCTATACTCCATTTCCTCCTGCCCAATTGCCTAGAAAGGTT
+GATTTGGAAATTGAAAGTGGTGAGTATTTCTTAAGCAAGAGAGAAAAGCAAATGAAGAAA
+TTAAATGAGCAAAAGGAAAAGCAAATGGAAAGAGAAATCGAAAGGCAGGAAGAGAGAGCA
+AAAGATTTCATAGCTCCGGAAGAAGAAGCATACAAGCCAAACCAAAATTAG
+>PRD1 2139 residues Pha 0 Code 0
+ATGCGATTGTTGCTGTGCAAGAATTGGTTTGCGTCACCTGTAATCTCACCACTACTGTAT
+ACCCGCTCCTTATATTCAATGGCTAACACTACTAGTTTCCCTATTGCTCCCCAGGCCCCG
+CCTAATTGGTCGTTCACTCCCAGCGATATTAGTGGGAAAACCAACGAAATCATCAACAAC
+AGCAACAATTTCTATGATTCTATGAGTAAGGTAGAGAGCCCTTCCGTGAGTAATTTTGTG
+GAGCCTTTCATGAAGTTTGAAAATGAATTGGGCCCAATAATTAACCAATTAACTTTCTTA
+CAGCATGTGTCGTCTGATAAAGAAATTAGGGACGCATCTGTGAACTCCTCAATGAAACTG
+GATGAGTTGAACATCGATCTATCTCTGCGTCACGACATCTTTTTGCAATTCGCCCGCGTC
+TGGCAGGATGTTCAATCGAAGGCAGATTCTGTGGAAAGAGAAACTTTCAAATACGTTGAG
+AAGTCTTACAAGGACTACATTCATTCTGGTTTGGAACTTGACGAGGGAAACCGATTGAAA
+ATCAAAGAGATCAAAAAGAAGATCTCCGTTAACTCTATTAATTTTTCGAAGAATCTGGGA
+GAACAAAAGGAATACATCACTTTCACCAAAGAACAATTGGAAGGTGTGCCGGATTCTATT
+TTGACGCAGTTCGAGACAATAAAATCTGACAAAGATAGCAATGAAACCTTGTATAAAGTC
+ACCTTCAAATATCCGGACATTTTTCCCGTGATGAAATTGGCATCCTCAGCTCAGACTAGA
+AAGCAGGCCTTTTTGGCCGACCAAAATAAGGTCCCTGAAAATGAAGCTATACTGTTGGAT
+ACATTGAAGCTGCGTGACGAATTGGCCTCGTTATTGGGCTATGACACGTATGCGAACTAC
+AACCTGTATGATAAAATGGCTGAAGATAGCACTACGGTAATGAACTTTTTGAATGATTTG
+AAGGACAAGCTAATTCCGCTGGGCAGAAAGGAACTACAGGTCTTGCAAGATATGAAAGCC
+GAAGATGTTAAGAAACTTAACCAGGGTGCAGATCCAAACTACTACATTTGGGACCACCGT
+TACTACGATAACAAATATTTGTTAGAAAACTTCAATGTGGACCTAGAAAAGATTTCTGAA
+TATTTTCCACTAGAGGCTACGATTACTGGTATGCTGGAAATATACGAAACATTGTTTAAT
+TTGAAGTTTATCGAGACGAAAGATTCTCAAAACAAATCTGTTTGGCATGACGACGTCAAA
+CAAATCGCCGTTTGGAATATGGATGATCCAAAGTCTCCAAACTTTGTTGGTTGGATTTAT
+TTCGATTTACATCCTCGTGATGGTAAATATGGCCACGCTGCCAATTTTGGTTTATCGTCA
+TCATTCATGATTGATGACACCACAAGATCGTATCCGGTTACTGCGTTGGTTTGCAATTTC
+TCCAAATCTACGAAGGATAAACCTTCTCTACTGAAGCATAACGAAATAGTGACCTTTTTC
+CATGAATTGGGCCATGGTATCCATGACCTGGTGGGACAAAACAAGGAATCGAGGTTTAAT
+GGCCCCGGATCTGTTCCATGGGATTTTGTGGAGGCACCTTCCCAAATGTTAGAATTTTGG
+ACTTGGAATAAGAATGAATTAATCAACCTCTCATCACATTACAAAACGGGCGAAAAAATT
+CCAGAATCTTTGATCAATTCATTGATCAAAACTAAACACGTAAATGGTGCTTTATTCACT
+CTAAGACAATTACATTTTGGGTTATTTGATATGAAAGTACATACTTGTAAAGACTTGCAA
+AACCTGTCAATTTGCGATACCTGGAACCAATTGAGACAGGATATTTCTTTGATTTCTAAT
+GGTGGTACGTTATCCAAGGGTTATGATTCATTTGGCCATATAATGTCAGACTCTTACTCT
+GCCGGTTATTACGGTTATCTATGGGCGGAAGTCTTTGCAACTGATATGTATCACACCAAA
+TTCGCTAAGGATCCGTTAAATGCCAAGAATGGGATACAATACCGTGATATTGTGTTGGCT
+CGTGGTGGCCTTTATGATATTAATGATAATCTGAAAGAATTTTTGGGTAGGGAACCTTCT
+AAGGATGCTTTCTTGAAGGAGCTGGGCTTACAGAACTAA
+>KAR4 1008 residues Pha 0 Code 0
+ATGGCATTCCAAGATCCAACTTACGACCAGAATAAAAGCAGACACATCAACAACAGTCAC
+TTGCAAGGGCCAAACCAGGAAACAATAGAAATGAAATCTAAACACGTATCATTCAAACCC
+TCTAGAGACTTCCATACAAACGATTACTCGAATAACTACATTCATGGGAAGTCGCTACCG
+CAACAGCATGTTACTAATATTGAGAATAGGGTTGATGGCTATCCAAAACTTCAGAAATTA
+TTTCAGGCGAAAGCTAAACAAATAAATCAATTTGCCACTACGCCATTTGGGTGTAAAATC
+GGAATAGATTCCATTGTTCCAACGTTGAATCACTGGATACAGAACGAAAATTTGACTTTC
+GACGTGGTGATGATTGGCTGCTTAACAGAAAATCAGTTTATTTACCCAATTTTAACCCAA
+TTGCCATTGGATAGATTGATCTCCAAACCAGGTTTCCTGTTCATCTGGGCCAATTCTCAA
+AAAATCAATGAACTTACTAAACTTTTGAATAATGAAATATGGGCTAAAAAGTTTAGAAGA
+AGTGAAGAATTGGTTTTTGTTCCTATTGACAAGAAATCACCGTTTTATCCAGGTTTAGAT
+CAGGACGATGAAACGTTGATGGAAAAAATGCAATGGCACTGTTGGATGTGTATCACAGGT
+ACAGTAAGGAGGTCTACAGATGGACATCTTATTCATTGTAACGTAGACACTGACTTGAGT
+ATCGAAACGAAGGACACCACTAATGGTGCTGTACCATCCCATTTGTATCGTATTGCAGAA
+AACTTCTCTACCGCGACTAGACGATTACATATTATTCCTGCAAGGACTGGTTACGAGACA
+CCCGTCAAAGTAAGACCTGGCTGGGTTATAGTGAGCCCAGATGTTATGTTGGATAACTTC
+TCACCCAAGAGATATAAAGAAGAGATAGCTAATTTAGGTTCGAATATCCCATTAAAAAAT
+GAGATTGAGCTGTTAAGACCAAGAAGTCCAGTACAAAAAGCACAATAA
+>PBN1 1251 residues Pha 0 Code 0
+ATGGTGACAAGACATAGAGTGACTGTACTCTACAATGCCCCTGAGGATATCGGTAATCAT
+ATGCGCCAAAATGACACTCATTTGACTGTTCGTGGAGGTTCTGGTGTGGTTTTACAACAA
+AGGTGGCTATTAGAGAGGACTGGAAGCTTGGATAAATCCTTTACGAGAATCACTTGGAGG
+CCCAGAGCGGACTTGGCTAGAAGTTTAAGCGTTATAGAAAATGAACTGAGTGCTGGCTTT
+TCAGTTTACTCAAATTCTTCGGATGTGCCGGAAAGGTTTATTACTAACCCAGTCTACAAT
+TCATTTCACAGTGAGAAGTTTGACATAGAGCAGTACTTGCCTCCCGAAGTAGATTTGAAT
+CTGTCATGGAATCCAGAAGATTTTACATATGATATATCAGTGGAGCCCACACAAATCCAA
+ATTGTTGAATATCGTCTGTTGAAACAGGGTGAAGAATTTACAATTGCAAGAGTGAAAGAT
+GAGAAACTCGAAGTAGGTGTATTCTTTGTGGATGCAAGTGATGAAAGTGATGTCGATATT
+GGTGGAATACGTTGTAATTGGAGGATGGACGATGGTAAAATGGAAAGATGTCAGAAAACA
+TCCTTATTGTATAAACAGGGCCATATCGCATACAATCACTCGACGACTACGACATCACTA
+TATCTGAATGAACCTATCGGTTTGCATCCAAAAATCATGATTGATCTCACAGATTTCGAA
+GAACGCCCTAAATGCATGTATCTAATGCACCTGCAATTGCCGTTAGAATTATTTATCGAT
+AAATTCCAATCCTCTCCCTTACTACTTTTTGGAGAAGACGACTTAGAATTACCAGAATAC
+TCTCTTCGAGATAAGGCATGGGGTTCTGAAAGTATCTTTGAATTGAAAGCCGGCACAATG
+AATGAAGTGACATTGCATACTAGATATATTGAGCCTTCTAATAATAAAGGGGATAAATTA
+GAAGTTTCATTTGATCCAGAAGTTATATTAGCCTGCGACACAGGTGACAATAAAGTTTCC
+CGTAATCCATTTTATAAAAAAGGTCTAGGATATGAATCTCTCTTTACAGACGATACTACA
+TTCCGCCATTTGAACTCGACAACTCTTCTAGTACCAATTCCAAGGCCTGACACAAAGGAT
+TATTCCAAGATCAAAAATGGTACGTTACTATGCTTACTCATCTCCATCATATACATTTTC
+TCCAAGGTATTTGGTAACAACAAGAAGAAAAGATCAGTAAAACGGGAATAA
+>LRE1 1761 residues Pha 0 Code 0
+ATGCCCAATACGCATACTCAACATGTGCAAATATCAGAGCCAAATCCTGTAAATACTTTG
+TCTACACCATCCAAAAGAGGTCACCGCCATCGCAGATCGCTAGCAATATCAGGAGATTTT
+GATTTTTTGAAACAGCCTGCAGCAATTGTGAATTTACCACCTCCACAGGCGGCTGAAAAT
+TGTCCTTCAACTGCCCCAACTGCTGTATCAAGTACATTATCGCCAATACGCTACAATAGA
+TTTCCTTGCAAAACCAATGAAGACGCTGGAACGTTAGATTTGCCTGAACCAAGATTTTAT
+CCGTTATCACCAAAGAACAATCTGCAAACACCAAGTCCACGATTTTTCATTAGTGAAGAG
+CCAAGTTTTTCATCGCCAGTTAAAGGCGTCCCAGATGCCATTATTAACCTTGACGATGCG
+TTGAAGACAAGGCCTAGGTCATTTAAATCACATAGAAGATCTGAATCCGCTCCTCCTGAT
+TTGGAGGTTATGGTAGATAAGGGCAATTGTGCAGCCGGTTCTAACTCTATGATTAAAGAA
+GAAGAGGACTCCTTAATTGAACCAGAATCGAAAAATGAATATTATGAGCAAAAGCTTCCA
+ACAGCACTATTATCCCCACTGCGGCCTTCCCTTTGTGTATCTGAACAGGCCATTGATGTA
+GATGATTCAGCTCTCAATGGGTCACCGACCCATCACAACCATGGGATGCAAAACGCCAAT
+GCACGGAATTCCAACACATTCAATTCGTTGAAGATCAAAGGCCAAAAGCAAAGATATTAT
+CATTATACGAAGCAGCTACCTTTGACCGTAGGCTGTGACTCGCAATCTCCAAAAGAACAA
+AGGTCGGCTGCTTCAATGACAATCAATCAGGCAATGACACCTTCTTCCCTGGCCTATACC
+CCTTCTAAACTAGCATCTACTCCCGCAACACCAGTATCCTTTTATGACAGCAATGCGGAC
+ATTAACTTAGAAAGTGATAATTTTCCACTAAAAGATAACCCTAGATATGCCAAGGATGGT
+TATCCTAAAAAGTGCGGCAATTCACAGCTTAATCGTGTGCTGGATAGCGATAAAAGACAG
+GATTTTAGTGGAGAATCGAGAAGAAGAAGATCGGGCAGTCCTATCTCCCACATGCAACAC
+CGCAACCTGATTGATAATATGAAAGGTAGACGAAACAGTAACACGATAAACTCAATCTTC
+AACTACAAGAGTCAACATTATGAAATGCCATATGATGATATGATGAAAAATGAAAACATT
+AATGCACAGTCCATGCCCTTTTCAGTCAACGGTGTCAACAATGAAAATAGTATCGGAGGG
+GTTATTACGAGAGCGGACGATGCACCCCTTCAACACTCTGTGGTCAAATCCTGTACGCCT
+GATGGCAAGGAAGAAATGAATAGGCTTAAAAGTAATGACAGTAATGAATATTCCAAGTCT
+GAAGGGCAGATCAGAACCAATTCGCAACTAAGTAAGGACATTCTCATGGGTGAACCAGGT
+GATATGGTTGATCTGTCCTCTTTTGTCAACACGCAGAGAAAAGCCTCAAATGAAACTGGT
+GACTTAGTCTTTAGTTTATCCCAGGATGATGACGCACTGAAAACGTTCCATGCGAGCAAT
+AGCGCAGCAACAAGCAATGAAAGCTGGTGTATTAGCGATGGTGCGTTAGGAAAGCAGGCG
+CAGGACAGTGAAGTTAGGAGGAAAGAAATCAAATTAGGACTCTTTAGACATATTTTCAAG
+GAAGTAATACAACAATATTAA
+>APA1 966 residues Pha 0 Code 0
+ATGAGTATCCCCGCTGACATTGCATCTTTAATTAGTGACAAGTACAAAAGTGCCTTCGAT
+AATGGTAACTTAAAATTTATCCAGACTGAAACAACGAAAACAAAGGACCCAAAAACCAGC
+ATGCCATACTTGATTAGCCACATGCCAAGTCTGATCGAAAAGCCAGAGCGTGGCCAAACT
+CCAGAAGGAGAGGATCCACTAGGCAAACCTGAGGAAGAATTAACGGTTATCCCAGAATTT
+GGTGGTGCCGATAACAAAGCGTATAAATTGCTATTAAACAAATTCCCTGTAATCCCTGGA
+CACACTTTATTGGTAACTAACGAATACCAACATCAAACTGATGCCTTGACCCCAACCGAT
+TTATTGACTGCTTATAAGTTGCTGTGTGCCTTGGACAATGAAGAATCCGACAAGAGACAC
+ATGGTCTTTTACAATTCTGGTCCAGCCAGTGGTTCTTCATTGGACCACAAACATTTGCAA
+ATTTTGCAAATGCCTGAAAAGTTCGTCACTTTCCAAGATAGACTATGTAATGGTAAAGAA
+CATTTCCTACCAACTTTCAATACTGAACCTTTGCAAGATGCTAAAGTCTCGTTCGCTCAT
+TTTGTCTTGCCAATGCCGGAGTCCGAAGAAACTGTTGATGAAGACCTATTAGCTATGTGT
+TACATCTCCATATTGCAAAGAGCTTTGACCTTTTTCCAGGACTGGTTGAACGAAAATCCA
+GAACTAAAGAAATCCTACAATCTTATGTTAACCAAGGAATGGATCTGTGTCGTTCCACGT
+TCGAAGGCCTTTTCTGATGAAATGAAGATAGGTTTCAACTCCACAGGTTATTGTGGTATG
+ATCTTAACCAAAAATGATGAAGTTTTCTCCAAGATTACTGAAAAACCTGAATTGATTAAC
+GATATCTTATTGGAATGTGGTTTCCCAAACACTTCTGGTCAAAAACCAAACGAATACAAC
+TATTGA
+>YCE9 939 residues Pha 0 Code 0
+ATGTTTAGTAAATACCTCGTAACTGCATCTTCCCTCTTTGTGGCTTTGACCTCTGCAGCA
+TCTACCGTTGATCTAGATGCTCTGCTTCTTCTACCAGGGGTCGAGTCCCACGACGGCGTT
+GATACTGTATTTTCGACCAAAGACTTTTATCAAGTGTCATTCGTCAAATCCATTGCTCCT
+GCTATCGTAAACAGCTCCGTAATCTTCCACGATGTTTCTCGTGGTGTGGCTATGGGCAAT
+GTCAAGAGCAGAGCAAGTATCTTCAACCCAGAGGAAACGTATTACGATTGGGAACAGTAC
+CAAGTAGTAAATAACGGAGACTGGCGAACCGAATGGGCACCTGCCTCTGACTGCATTTGG
+AGGGAGGAGAAGGATAACAGCGACGAAACACCGGACAGATTCCCCATCTCGGTGCCATAT
+AATTGGACGTCACAGTACTCAATTGTAGATTATGACACAGACGCTAACGAAGACAATTTA
+GATTTCAGGTTTATTAAATCATTGCTAGATAAGAAAAATTGGTTGAATAAAATTAACCAG
+ACTGTTTCCCAATCCAGTATTATGGTAGCACCAATGATTAAGCCATACAATGTGGTCCAG
+CTTTGGTATTCAAAATATATGGTTTGGGCAAACGTTCAAAGACAATATTGTAGCGGTGTT
+TATCCAGGAGGGACTCAATGTAGCGCTTGGTCCAGGTACTACCATGTTGATGCACCTACC
+TGCGATGAGCCTGTCGCCTCTTACATGACCAAAATGTCGGAAAATGAGGTTCAGTGTCCC
+AATGAGAGAAACGCAACTACCCTAGAGCCTCTCCGCCTGAATAAGCAGGGAGACTCTGAT
+TTTTCTTTGACTTTCGAGGAAGAGGAAGAGGAAGAGACAGGATCTAAATCTCTTTGGAGT
+ACATTGAAAAAAATTTTCTCTAAAAGAAGTATAAGTTGA
+>YCE8 1392 residues Pha 0 Code 0
+ATGAACCGTATTACTAGGAAAAGTTGTTTATTCGCGATTATATTTGCATCATTATTTGTG
+ACACATGCATTGGGTGCCGCTATTGATCCGCCAAGGCGACCACATAATGTGAAGCCTTTT
+CATAACGGTAATCTCGAACTTCAAAGAAGAGCAAATGAACCGTTTTTTGAAATAGATGTC
+AAGAGTCTGAACACAAACTCACCGATATCAGAGTTGTGTAAAAAAGATTTGCACGTCATT
+GAATCGTCTCATGATCTTTTTCATTTACAAAACCAATGTGAATTCATCTTGGGGTCATTA
+AAAGTCACAAACTATGATTCTAACATTTTGGATTTGAACAGCTTGAGGGCCATTGGTGGT
+GACCTGATTATTCAGGATTCACCTGAACTGATCAGAATCCAAGCCGGGAACTTGAATAAA
+ATCGAAGGGCTCTTCCAATTACAGGGACTAACCTCTTTGGTTTCTGTTGAAATTCCAACT
+TTGAAATTTTGTCAGTCACTGGAGTGGAAAGTTGTTCCCATCTTGAACTACGTCTCCATG
+GATTCTCAGAATATTGAGATTATAAAGGATATTGTCATATCGGATACTTCATTAGCAAAC
+ATCGAGAATTTCAACAAGGTTCAGGAAATTGATACTTTCAATATCAATAATAACAGATTT
+TTAGAAACTATTCATTCGAACGTTAAAACCATTAGGGGACAATTCAGTGTACATGCGAAC
+GCTAAGGAGCTAGAACTTGAAATGCCACACTTGAGAGAAGTGGAAAACATAACGATTAGG
+GACACATCATTGGTCTACCTTCCACAATTAACAAAAGTGAAAAGCTCTTTAGAGTTCATC
+GAAAATTACTTTTACGAATTGAACCTGAACAATTTGCAGAAGATTGGTGGAACATTAGGA
+ATTATCAACAATGTAAATTTAATAAAAGTTAATTTGGAGAACTTAACAGACATTCAAGGT
+GGCTTGATGATCGCCGATAACGAATCCCTCGAGGATATTACTTTCCTGCCAAACTTGAAG
+CAGATTGGAGGTGCTATTTTCTTTGAAGGTTCGTTCAAAGATATCATGTTCGATAGCTTG
+AAACTGGTGAAAGGTAGCGCTTTTATTAAGAGTTCATCAAACGTGTTGGATTGCAATAAA
+TGGACAAACCCATCAAATGGAAGATCAATCATCAGGGGTGGGAAATTCACTTGTATTTCT
+GGTAAGAAGGAAAATACGCTGAATGTTAAACAGGATGGTACAATCATAGAAAAAGGGTAC
+AAAGATTTAACGCAAGAAGGTGAAGACTCCAAGAAAAGAGTGATTTCAAAATACGCGAAC
+TCAGCAAATCCAAGCATGCAATTGGACCCCCTTCTTTTTGGTACATGCCTTGTTGCTATG
+TTATTGTTTTAA
+>YCE7 777 residues Pha 0 Code 0
+ATGAAGAAGACGTTCGAGCAGTTTCGAAAAAGCAATTTACTATTTCAGGTTCTCAAAGGA
+CCCCAGCATCTAGAATGTCAGAAGTTATTTGTCCTTGATTCTTCATTCAATCCACCACAT
+CTGGCCCATTTTCAACTACTATCGCAGACTATTAAAAACTTCAAATTGAAGGACACCCGT
+TCGCATGTTTTATTACTGTTAGCGGTGAATAATGCAGATAAGTTGCCTAAGCCGGCATCT
+TTTCCAACTCGTCTGGAAATGATGTGCTTATTCGCTGACTACCTTCAGGAGAAGCTCCCC
+CAATCTGTAGTATCTGTCGGGTTGACTGTTTTCTCGAAATTCATCGACAAGGACAAAATA
+TTACATGAGCAATTTGTTAAAGGATGCAGTGCAGATATAGGCTACTTAGTTGGTTTTGAT
+ACAATTGCTAGGATCTTTGATGAAAAATATTATCATCCTTTAAAAATCAGTGATGTAATG
+GAGAGCTTCATGTCGGGATCTCAATTATATTGCTTGGCGAGAGGCGATTGCCATCTCAGT
+GCTGAATCGCAACTAAGATACGCCAGTGACATCCTTGAGGGAAAATTCGAACCGGTAATA
+CCAAGAGAATGGGGCGCTAGGATTCATGTTATGCAAAATGATTATCCAGCATTAAGAAAT
+GTTTCATCATCCGAGATTAGGAACAAACTGAAGAATGGGCAAGTGGAGAGTTTGAAAGAC
+GAGTTGCCATTGTGCATATACGATTATTTGATCAATAATAAGACAATATTTGATTGA
+>YCE5 2283 residues Pha 0 Code 0
+ATGAAGATAACGTGTACAGACTTGGTGTACGTCTTCATTTTACTCTTCCTAAACACGAGT
+TGTGTCCAAGCCGTTTTTTCAGATGATGCATTTATCACTGATTGGCAACTGGCTAACTTA
+GGTCCTTGGGAGAAAGTCATCCCTGATTCTCGAGACCGCAACAGGGTTCTCATCTTATCG
+AACCCTACCGAAACTTCCTGCTTAGTTTCTTCGTTTAACGTTTCTTCCGGACAGATTCTT
+TTCAGAAACGTTTTACCCTTTACCATTGATGAGATTCAACTGGATAGTAATGACCATAAC
+GCAATGGTTTGTGTGAACTCTTCAAGCAACCATTGGCAGAAATATGATTTACACGATTGG
+TTTTTACTAGAGGAAGGCGTAGATAATGCCCCTTCTACGACCATTTTACCTCAATCCTCA
+TATTTAAACGATCAAGTATCTATTAAGAACAATGAACTACATATTCTCGATGAGCAGTCA
+AAACTGGCAGAATGGAAATTGGAGTTACCTCAAGGGTTCAATAAAGTGGAATATTTTCAT
+CGTGAAGATCCCCTGGCGTTAGTGTTGAACGTTAATGATACCCAATATATGGGATTCTCT
+GCCAATGGCACAGAATTGATCCCCGTTTGGCAAAGAGATGAATGGTTGACTAACGTGGTA
+GACTATGCTGTATTGGACGTCTTCGATTCTAGGGATGTGGAGTTGAACAAAGATATGAAA
+GCGGAACTTGATTCAAATTCGCTTTGGAATGCTTACTGGCTTAGATTGACAACTAATTGG
+AATCGCCTTATCAACTTATTGAAAGAAAACCAATTCTCACCAGGACGTGTCTTCACTAAA
+CTCCTAGCTCTAGACGCTAAGGATACCACGGTATCAGATTTGAAGTTCGGATTCGCCAAA
+ATCTTAATTGTTTTGACGCATGATGGCTTTATCGGCGGCCTTGATATGGTCAATAAGGGC
+CAACTTATCTGGAAACTCGATTTAGAAATTGATCAGGGCGTCAAAATGTTCTGGACGGAT
+AAAAACCATGACGAACTTGTTGTTTTTTCGCATGATGGGCATTATTTGACAATTGAAGTT
+ACTAAAGATCAACCGATTATCAAATCAAGATCCCCCCTATCTGAAAGGAAAACTGTTGAT
+TCCGTTATTAGGCTGAATGAACATGATCACCAGTATCTGATTAAGTTTGAGGATAAGGAT
+CATTTACTGTTCAAATTGAATCCCGGCAAGAATACGGATGTACCAATAGTTGCCAACAAC
+CATTCTAGTTCCCACATATTCGTCACAGAGCATGACACGAATGGCATTTATGGCTACATA
+ATCGAAAACGATACGGTAAAACAAACTTGGAAAAAAGCCGTAAATTCGAAAGAGAAAATG
+GTGGCATATAGCAAGAGGGAAACAACAAACCTAAACACTCTTGGTATTACACTAGGTGAC
+AAATCGGTTCTTTATAAATATTTGTACCCCAACCTAGCGGCTTATCTGATCGCTAATGAA
+GAACATCATACAATCACTTTTAACTTAATTGATACCATTACAGGAGAAATCCTCATTACC
+CAAGAGCACAAGGATTCTCCGGATTTTAGGTTTCCAATGGATATTGTTTTCGGTGAATAT
+TGGGTCGTTTATTCCTATTTCAGTTCTGAACCTGTTCCAGAACAAAAGTTAGTAGTGGTG
+GAATTATATGAGTCACTAACCCCAGATGAGCGTTTGTCTAACTCAAGCGACAATTTTTCT
+TATGATCCATTGACTGGACACATTAACAAACCTCAATTTCAAACTAAACAATTCATTTTT
+CCCGAGATTATCAAAACAATGTCCATTTCCAAGACAACGGATGATATTACCACAAAGGCA
+ATCGTTATGGAATTAGAAAATGGACAAATCACCTACATACCAAAGCTTTTATTGAATGCA
+AGAGGTAAACCAGCAGAAGAAATGGCCAAGGATAAGAAAAAAGAGTTTATGGCTACCCCA
+TACACGCCAGTTATCCCAATTAATGATAATTTCATTATCACTCATTTCAGAAATCTATTG
+CCAGGATCCGATTCGCAGTTGATCTCCATCCCAACCAATCTGGAATCCACAAGCATTATA
+TGTGATCTAGGCCTTGATGTATTTTGTACAAGGATCACACCTTCGGGCCAATTTGATTTA
+ATGAGTCCTACTTTCGAAAAGGGTAAATTGCTTATTACTATATTCGTCTTGTTGGTGATC
+ACGTATTTTATCCGTCCTTCTGTTTCAAACAAGAAGTTGAAATCCCAATGGCTAATTAAA
+TAG
+>YCE6 324 residues Pha 0 Code 0
+ATGGTAAAGGGTAAAACGTTTCTGAAAAGAATCTGTCCGGAAGAAACGTTAAACGAAGAA
+ACTAAGCAGGAAGTTTCGGTAGGGTTCGATAAGATGAGAACCCTGTTGCGGTCTCGAGAA
+TCAGGGATGACTTTCTCCCAAGGACCTAAGTTAGCCAGTTGCCAATCAGTGATAAATGCA
+TCATCTGAAAAAACGGCTTGGACACAACTCGTGTTTAGGAAGAGTAAAATGAAGACGTAC
+ACCAAGTCTGTACACGTTATCTTCATTGCTATGGGGGAAGGGGAGGATGAAAGTGTTGAT
+ATGAATGTAGGTATTAGTTATTAA
+>YCE4 1254 residues Pha 0 Code 0
+ATGGCTGTATTTACTCCTCCATCAGGTAATAGCAATTCCACCGACCATACTCACACACAA
+GATGACCACGACAAAGATGATAATGATATCAAGAAATTCTACATAAGGCCAAGTTTAGGC
+TTAAAACTGTGGGGTCCGCTCGTACCCGCTCCTGATAACCTACCGGGACTATACACTCTA
+ATCACTATCCAATCTGCAGTGGGTTTCTTTGCCCTTTGGAGACTGAGAAGGCTCTACAAA
+CTACCGCCACCGCGCCGCATTGCCACTGGCACTCACTCGGATTTATCCTTTGGCGAACTA
+CCCAGTGAAATGATTGTCAATGGCAAGACTAAAATCAAAAAGGATATTGCTGACTTTCCA
+ACTTTGAACCGCTTCTCCACCACCCATGGTGACATTGTGCTCGCCCCTCCTCCCATCATA
+CCTCGCCAATCTCGATTCGTCAGCGTCAGAAAGCTCTTATGGGGGTTGTTTGGCTCTTTG
+CTACTTTCTCAGTCACTGTTGGAGCTTACTCGCCTGAACTTTCTTAAATACGACCCCTGG
+TGCGACGAAATGAAATCCGTACGTGACAAGAAGTTTTTCAACAATATTGTCAAATATTAT
+CACGAGGGCATAGACCCCACCAAAATAAAAGTCAAGGATGCTATGAACGGTACTCCTCTC
+TCGACAAATATCCCTGAGGTCAAACAAAGCGTCGCTCTCGCTAGAGCGCAAGTTGAGGCG
+CAGAATCCCATTATTAAATGGTTCGGACCCTTGGAATACAAGCCCATGTCTTTCAACGAG
+TACCTCAATCGCATGGAATTTCACTTGGACATGTTCGAGTTTTTTCAAAATAAAAGAAAC
+ATTAGAGAAAATTCCATTGAACTCATCAATTCCATATCCCACAATCCGCAGTCTTCTTCT
+ACTGGCCTTGAAGGTCTTTCCGAGTCCAAAAAACTCCATCTACAAAATGTGGAAAAAAGA
+CTGCATTTCTTAGCATCTTCGGGAGATTCCATTTCCGCACCAGTAAAGAAGAGATCCAGC
+ACCACACTCTCCCGAGGTGTCATTTTGCCCCATGACACGAAAGGCCCGCAAGATATTGAT
+CTCGATACAATAAGATCGCTTTATGATCCATGGATGACTTTGGCCTTAGAAACTTCGCTA
+AGCATCAAATTCATACCAACTACCATGCCCTCCCATACCAAGACACCCACTAGCACGGAC
+CAGCCGTTACCAGGGCCTACCCCCAAGGCTCTCACTAATGAAAAGACACATTAG
+>PDI1 1569 residues Pha 0 Code 0
+ATGAAGTTTTCTGCTGGTGCCGTCCTGTCATGGTCCTCCCTGCTGCTCGCCTCCTCTGTT
+TTCGCCCAACAAGAGGCTGTGGCCCCTGAAGACTCCGCTGTCGTTAAGTTGGCCACCGAC
+TCCTTCAATGAGTACATTCAGTCGCACGACTTGGTGCTTGCGGAGTTTTTTGCTCCATGG
+TGTGGCCACTGTAAGAACATGGCTCCTGAATACGTTAAAGCCGCCGAGACTTTAGTTGAG
+AAAAACATTACCTTGGCCCAGATCGACTGTACTGAAAACCAGGATCTGTGTATGGAACAC
+AACATTCCAGGGTTCCCAAGCTTGAAGATTTTCAAAAACAGCGATGTTAACAACTCGATC
+GATTACGAGGGACCTAGAACTGCCGAGGCCATTGTCCAATTCATGATCAAGCAAAGCCAA
+CCGGCTGTCGCCGTTGTTGCTGATCTACCAGCTTACCTTGCTAACGAGACTTTTGTCACT
+CCAGTTATCGTCCAATCCGGTAAGATTGACGCCGACTTCAACGCCACCTTTTACTCCATG
+GCCAACAAACACTTCAACGACTACGACTTTGTCTCCGCTGAAAACGCAGACGATGATTTC
+AAGCTTTCTATTTACTTGCCCTCCGCCATGGACGAGCCTGTAGTATACAACGGTAAGAAA
+GCCGATATCGCTGACGCTGATGTTTTTGAAAAATGGTTGCAAGTGGAAGCCTTGCCCTAC
+TTTGGTGAAATCGACGGTTCCGTTTTCGCCCAATACGTCGAAAGCGGTTTGCCTTTGGGT
+TACTTATTCTACAATGACGAGGAAGAATTGGAAGAATACAAGCCTCTCTTTACCGAGTTG
+GCCAAAAAGAACAGAGGTCTAATGAACTTTGTTAGCATCGATGCCAGAAAATTCGGCAGA
+CACGCCGGCAACTTGAACATGAAGGAACAATTCCCTCTATTTGCCATCCACGACATGACT
+GAAGACTTGAAGTACGGTTTGCCTCAACTCTCTGAAGAGGCGTTTGACGAATTGAGCGAC
+AAGATCGTGTTGGAGTCTAAGGCTATTGAATCTTTGGTTAAGGACTTCTTGAAAGGTGAT
+GCCTCCCCAATCGTGAAGTCCCAAGAGATCTTCGAGAACCAAGATTCCTCTGTCTTCCAA
+TTGGTCGGTAAGAACCATGACGAAATCGTCAACGACCCAAAGAAGGACGTTCTTGTTTTG
+TACTATGCCCCATGGTGTGGTCACTGTAAGAGATTGGCCCCAACTTACCAAGAACTAGCT
+GATACCTACGCCAACGCCACATCCGACGTTTTGATTGCTAAACTAGACCACACTGAAAAC
+GATGTCAGAGGCGTCGTAATTGAAGGTTACCCAACAATCGTCTTATACCCAGGTGGTAAG
+AAGTCCGAATCTGTTGTGTACCAAGGTTCAAGATCCTTGGACTCTTTATTCGACTTCATC
+AAGGAAAACGGTCACTTCGACGTCGACGGTAAGGCCTTGTACGAAGAAGCCCAGGAAAAA
+GCTGCTGAGGAAGCCGATGCTGACGCTGAATTGGCTGACGAAGAAGATGCCATTCACGAT
+GAATTGTAA
+>GLK1 1503 residues Pha 0 Code 0
+ATGTCATTCGACGACTTACACAAAGCCACTGAGAGAGCGGTCATCCAGGCCGTGGACCAG
+ATCTGCGACGATTTCGAGGTTACCCCCGAGAAGCTGGACGAATTAACTGCTTACTTCATC
+GAACAAATGGAAAAAGGTCTAGCTCCACCAAAGGAAGGCCACACATTGGCCTCGGACAAA
+GGTCTTCCTATGATTCCGGCGTTCGTCACCGGGTCACCCAACGGGACGGAGCGCGGTGTT
+TTACTAGCCGCCGACCTGGGTGGTACCAATTTCCGTATATGTTCTGTTAACTTGCATGGA
+GATCATACTTTCTCCATGGAGCAAATGAAGTCCAAGATTCCCGATGATTTGCTAGACGAT
+GAGAACGTCACATCTGACGACCTGTTTGGGTTTCTAGCACGTCGTACACTGGCCTTTATG
+AAGAAGTATCACCCGGACGAGTTGGCCAAGGGTAAAGACGCCAAGCCCATGAAACTGGGG
+TTCACTTTCTCATACCCTGTAGACCAGACCTCTCTAAACTCCGGGACATTGATCCGTTGG
+ACCAAGGGTTTCCGCATCGCGGACACCGTCGGAAAGGATGTCGTGCAATTGTACCAGGAG
+CAATTAAGCGCTCAGGGTATGCCTATGATCAAGGTTGTTGCATTAACCAACGACACCGTC
+GGAACGTACCTATCGCATTGCTACACGTCCGATAACACGGACTCAATGACGTCCGGAGAA
+ATCTCGGAGCCGGTCATCGGATGTATTTTCGGTACCGGTACCAATGGGTGCTATATGGAG
+GAGATCAACAAGATCACGAAGTTGCCACAGGAGTTGCGTGACAAGTTGATAAAGGAGGGT
+AAGACACACATGATCATCAATGTCGAATGGGGGTCCTTCGATAATGAGCTCAAGCACTTG
+CCTACTACTAAGTATGACGTCGTAATTGACCAGAAACTGTCAACGAACCCGGGATTTCAC
+TTGTTTGAAAAACGTGTCTCAGGGATGTTCTTGGGTGAGGTGTTGCGTAACATTTTAGTG
+GACTTGCACTCGCAAGGCTTGCTTTTGCAACAGTACAGGTCCAAGGAACAACTTCCTCGC
+CACTTGACTACACCTTTCCAGTTGTCATCCGAAGTGCTGTCGCATATTGAAATTGACGAC
+TCGACAGGTCTACGTGAAACAGAGTTGTCATTATTACAGAGTCTCAGACTGCCCACCACT
+CCAACAGAGCGTGTTCAAATTCAAAAATTGGTGCGCGCGATTTCTAGGAGATCTGCGTAT
+TTAGCCGCCGTGCCGCTTGCCGCGATATTGATCAAGACAAATGCTTTGAACAAGAGATAT
+CATGGTGAAGTCGAGATCGGTTGTGATGGTTCCGTTGTGGAATACTACCCCGGTTTCAGA
+TCTATGCTGAGACACGCCTTAGCCTTGTCACCCTTGGGTGCCGAGGGTGAGAGGAAGGTG
+CACTTGAAGATTGCCAAGGATGGTTCCGGAGTGGGTGCCGCCTTGTGTGCGCTTGTAGCA
+TGA
+>YCD8 1587 residues Pha 0 Code 0
+ATGAGCTATGGAACTATAAATGATATGAATGAATCGGTAACGAACTATCGAATAAAAAAA
+GCCCAAAACAATATCAAGGGATGGTACGCTTACTCATTTTCTAGCGAACCATTTGTCGTT
+TCTGCGGTTTCAACGTATATTCCCTTACTACTGCAGCAATTTGCGAGTATAAATGGTGTA
+AAAGTTCACGATCACTCCATACCCTGCCTGTCAGAAACGGGTAGTGATTCAGATAAGTGT
+GTTCTTGGTTTGTTCAACAATCGGATCTTCGTAGATACTTCAAGTTTTGCATTATATGTC
+TTTTCCCTTAGCGTTTTATTCCAAACTATAATAGTCATTTCCGTTTCAGGGATAGTAGAT
+CTCTGGGGGAGCGTTAAATTCAAAGGCAGAATTCTGGTTTGGTTTGGTATTGTGGGCGCA
+TTGTCGACTGTTGCGATTTCAAAATTGAATGATACCCAGATTTATTCTCTGGCTGGGCTT
+TATATAGTGGCCAATGGTTGTTTTGGCGTTATCAATGTTGTTGGGAATTCTCTTCTGCCC
+ATTTTTGTCAAGGATTCTTTGAAATGTCAAAGTCAAGGAGCTTATGAACCTGATAAGGTA
+GACTCGTTAACTACTGTTATTAGCGGTAGAGGTGCATCTTTAGGTTATTCAAGTGCCCTC
+ATTGTTCAGATTGTATCTATGTTCTTAGTCGCATCTAAAAAGGGCAGTAAGCAGGATGTT
+CAAGTGGCTGTTCTTTTCGTTGGGATTTGGTGGTTTGTGTGGCAACTGCCCATGATCTGG
+TTGATTGACGATGTGACAATACCGATAAGAGTTGACGATTCTACATTAGCATCCGCCCGC
+AGTCCGTATCCCGGTGAGCAAGACGCCTTGGGTCAACTAAACTGGAAGAATTACCTTTCA
+TATGGTTGGGTTTCGCTTTTCGAATCGTTTAAACATGCCAGACTATTGAAAGATGTGATG
+ATTTTTCTTATTGCGTGGTTTATTATTAGTGATTCCATTACAACTATAAATTCTACAGCG
+GTTTTGTTCTCCAAGGCAGAACTGCACATGAGTACCCTCAATTTAATCATGATAAGTGTT
+TTGACCGTTGTAAATGCAATGCTGGGTGCCTTTATGATTCCACAATTTCTTGCCACAAAG
+TTTCGGTGGACTTCTAGTCAAACTTTGATGTACATTATCATTTGGGCAAGTTTCATACCA
+TTTTATGGTATTCTTGGATTTTTCTTCAATGCGTTCGGTTTAAAGCATAAGTTTGAAATG
+TTCTTATTGGCCATTTGGTATGGATTATCACTAGGTGGCCTGTCCGCGGTTTCAAGATCA
+GTTTTCAGTTTGATTGTACCTCCAGGAAAAGAATCCACGTTTTTTAGTATGTTCAGTATC
+ACAGATAAGGGGTCGTCCATCCTGGGACCCTTCCTTGTTGGACTGCTTACCGATAAAACG
+CATAATATTCGCTATTCGTTTTATTTCTTCTTTTTGCTTTTGATGCTATCATTGCCTGTG
+CTAAACTGTTTGGATGTCAAGAGAGGTAGAAGAGAGGCTGAAGAACTCAGTCAAGTTTTA
+CCTGAAAGTGAAAGAAGGTTGGATTAG
+>SRO9 1401 residues Pha 0 Code 0
+ATGAAGATCTTTTGGGATCCTAGATCGGTAATAGAACATCAGGATTACTCTGGACCTGCT
+AACGTGTTTCATCTTCTTTTCACTTCTCTGCCCACGATGTCTGCTGAAACCGCCGCCGCA
+AACACTGCTACTGCCCCAGTCCCAGAAGTGCAAGAACAAGAGAGCTCCAAGAGCAAGCAA
+GTCAACTTGACGCCGGCACCATTGCCCACATCTTCCCCATGGAAACTTGCTCCTACTGAG
+ATCCCTGTTTCTACTATCTCAATAGAAGACTTGGATGCCACAAGAAAGAAGAAGAACAGA
+ACACCCACTCCGAAATCATCGACTGCTACCAAGTGGGTTCCCATCAAGGCCTCCATTACC
+GTCTCTGGCACCAAAAGATCCGGTTCCAAGAATGGTGCAAGTAATGGCAACAGCAACAAG
+AGCAAAAACAACAAAACTGCAGCATCGTCGACATCGTCGAGTAATGCTAACAGGAAAAAG
+AAGCATCACCAACATAATGCTAAGAAGCAACAACAAATGAAGAAAGATGGCTTTGAATCG
+GCAGTAGGTGAGGAAGATTCAAAAGACGCTACCTCTCAAGAAAATGGTCAATCTACACAA
+CAGCAACAACCACCTCACCACCGTAATCATCACCACAGTCATCACCATAACAGCAATGGT
+CCTCAAAGGAGAAAGTTCCACAACAGTAATAACGCCGGTATGCCTCAGAACCAAGGCTTC
+CCACCACAGTTTAAACCTTACCAAGGACGCAACGCTCGTAATAACAACAACAACCGCTCT
+AAATACCACAACCACTTCCATCACAACCAACAACATCCTCAACAACCTATGGTCAAATTA
+CAGCAACAGTTTTATCCAGTCCAACCAGTGTTAATGGCCATCAACAACATTGCTAGACAA
+ATTGAATACTATTTCAGCGAAGAAAACTTGACCGTCGACAATTACTTAAGGTCCAAACTC
+TCCAAGGATGGTTTTGCTCCATTGTCTTTAATCTCTAAGTTTTACAGAGTTGTTAACATG
+TCCTTCGGAGGTGACACTAACCTGATTTTAGCCGCATTGAGAGAAATTGTCGCTAACGAA
+GCCGCTACCGTCAATGTTGCAGAAGGTACTTTGGCCGCCAAGGAAGGTGATAACGTTACC
+GGTGAAGCCAAAGAACCATCTCCATTGGATAAGTACTTCGTTCGTTCCAAGAGCTGGTCA
+AACTGGTTACCAGAAACTTTTGAAACTGAAATTAATATTGAAAAAGAACTGGTCGGCGAT
+GCATTGGACCAATTCATGATATCCCTACCACCTGTTCCTCAACAAGAAGAGGAATCATCC
+ACTGAACTCGCTTCTCAAGAACAAGAAACCAAAGAAGACTCTGCGCCGGTTGCTGCCGGT
+GAATCCGAGTCTTCCTTATAA
+>YCD6 1701 residues Pha 0 Code 0
+ATGCAGGTTCAAAAAATGGTGAGAGATAACAGTAATAACGGTAGCGATAAAAGCGTCCAT
+TGGGAGAGGAGGAATAATAACGGCGCAGGCCCCCGTTATCGTTCCAGAAGCGGTAATACC
+GGTGCTTTGGCAACAAAACTAAGTAATGGGACGCTCTCTGTCAGAGGATTAGTGAAGGAC
+CGAACAGGAAGCGGCAAGATCGCGGGCTGTGTGGAGGCGTTTCTGGATGCCAGGACCCAA
+TTGAATACGCCCTGGGACCGTGCTAAGTGCAATTGGCTGGACCAGATAGATTACTATGTA
+CAGTTGAGAAAGACCGCGTTTTCTAAGGAATTGGACCAACTAAGGAAGCCCATGATCGAT
+GCATATGTGGCGGAGATGAGGCAGAAGTTTGATGCCTCCTATGGACAATCCAGGGCGCAA
+TTGGAAGCCAAACTGGCGCAGGTGGACAGTGAATGGCATATGGTACATGGTGATGTGCAT
+GCAAAACTGGAAAAACTCGTGGAAGAACGCCGGTTTTTGAAAAGATTAAGCGACACGATC
+GTACCACCCAGGTCCAAAAGATCACAGCGGCTGTCTCCATTGACCAAAGAGGACCGAGCC
+AACTGTATCTGTCCGCAGCCCAAAGGAATGAGCGACACCGCTTGGTTCGAAGCCATTCAG
+AAGAAAATGTTAGGAATGAATGGTACCATCAAGCTCCTAGAGACAGAACAGAAACTACTG
+GCTGACGAGAAAAACAGCGTGAGGAAGACGTTCTGGCCCATGGTGGAAGCACATTCACGC
+TCGAATGAATTTGCTTATCTGGAGAAATGCATCAGGCTGATGGCCTCTCAGAGAGCAATA
+TGCTTTTGTCTTGATATAGAGGCTTTCGAAACAAACCAGAACGTAATCACCGAAATTGGG
+ATTTCAATTTATGACCCCAGGGAAAATATGGTGCCGTCAATGGTTCCAATTACAAAGAAT
+TACCACCTAATTATCGAGGAGTCCCTGGAACTTAGAAACCAAAAATGGGTCTGTGACTAC
+AAGGATTGCTACTTATTGGGAGAAAGCTATGTTTTGAGCTTGAAAGAGTGCGTGCATTTC
+ATTCAATCACTAATAAACTATTACTTGGTCCCGGTGACCGAAGAAGACAAGACATGGTCA
+AGGGCATTTGTTGGTCATCACGTGAGCGGGGATCTTAAGTGGCTGGAGACTATTGGTGTC
+AAATTCCCTGGCAGAGGGTATGAAGGCCATCTGGACCATACGCTGCTTTTGGCTGAAACT
+CCCGGTGATCTAGACGTGTTCATCTTGGACACTGAGCAGTTTTACAGGAAATCGTATGGC
+GAAAAGGGCAGCAGTCTGGGCAAGATTCTGCGGTTGTTCGAGATACCGCATGCGTTTCTA
+CACAATGCCGGTAACGATGCCTACTATACCCTGCATTTGTTCATGAAGTTTTGCGATGTT
+AATTTCAGGAAAATAAGCGGCATGGACGATGTTCTTAAAGTAATGGGCCAAGTAAAAGTT
+TGGGGAGAACGAGACGTACGAGAGCCTAAAGTGGTGCCCATGTCGTATGCCATCTCCATC
+GAGGAGGCAGTCAAAAATCGGACGTACCGCAAGGGCGTCAAGAGCAGTAGGAAGGAAAGA
+GTCTGCCAAACGGAATTCGGTGGGTTAACGTATTTCGGAACTGCTAAAGACGCCTTCACA
+AGCACTCTTCCGACACACTAA
+>YCD5 333 residues Pha 0 Code 0
+ATGGTATCTCAAGAAACTATCAAGCACGTCAAGGACCTTATTGCAGAAAACGAGATCTTC
+GTCGCATCCAAAACGTACTGTCCATACTGCCATGCAGCCCTAAACACGCTTTTTGAAAAG
+TTAAAGGTTCCCAGGTCCAAAGTTCTGGTTTTGCAATTGAATGACATGAAGGAAGGCGCA
+GACATTCAGGCTGCGTTATATGAGATTAATGGCCAAAGAACCGTGCCAAACATCTATATT
+AATGGTAAACATATTGGAGGCAACGACGACTTGCAGGAATTGAGGGAGACTGGTGAATTG
+GAGGAATTGTTAGAACCTATTCTTGCAAATTAA
+>YCD3 507 residues Pha 0 Code 0
+ATGAATAAGTGGAGCAGGCTGTACGTTATAACTGTACGCAGGACTTTTCCAGGGAGAAGA
+AACATTGTACTGACGCAGTACTGGAATAAGAGCAAGAAAATGAGTGACGAATCGAATGAC
+GTGAAGTGGAACGATGCCCTGACACCATTGCAGCTGATGGTGCTGAGAGATAAGGCCACT
+GAAAGGCCCAACACCGGTGCGTATTTACACACCAACGAGTCCGGTGTCTACCATTGTGCC
+AACTGCGACAGACCGTTGTATTCGAGCAAGGCCAAGTTCGACGCTCGTTGTGGATGGCCC
+GCATTCTACGAAGAGGTATCCCCTGGAGCCATCACATATCATCGTGACAATTCTTTAATG
+CCTGCGAGGGTGGAGATATGTTGTGCAAGGTGTGGTGGACACTTGGGACATGTGTTTGAA
+GGTGAAGGCTGGAAACAGTTGCTAAACTTGCCCAAGGACACCAGACACTGTGTGAACAGT
+GCGTCTTTAAACCTCAAGAAGGATTAA
+>STE50 1041 residues Pha 0 Code 0
+ATGGAGGACGGTAAACAGGCCATCAATGAGGGATCAAACGATGCTTCGCCGGATCTGGAC
+GTGAATGGCACAATATTGATGAATAATGAAGACTTTTCCCAGTGGTCGGTTGATGATGTG
+ATAACTTGGTGTATATCCACGCTGGAGGTGGAAGAAACCGATCCATTATGTCAGAGACTG
+CGAGAAAATGATATTGTAGGAGATCTTTTGCCGGAATTGTGCTTGCAAGATTGCCAGGAC
+TTGTGTGACGGTGATTTGAATAAGGCCATAAAATTCAAGATACTGATCAATAAGATGAGA
+GACAGCAAGTTGGAGTGGAAGGACGACAAGACTCAAGAGGACATGATAACGGTACTGAAA
+AACTTGTACACTACTACATCTGCGAAATTGCAAGAATTTCAATCGCAGTACACAAGGCTG
+AGGATGGATGTCTTGGACGTAATGAAGACCAGCTCAAGCTCTTCTCCGATTAACACACAT
+GGAGTGTCCACTACGGTACCTTCTTCAAACAACACAATTATACCCAGTAGTGACGGTGTG
+TCTCTTTCACAAACAGACTATTTCGACACAGTTCATAACCGACAATCACCGTCAAGGAGA
+GAATCCCCGGTAACGGTATTTAGGCAACCCAGTCTTTCCCACTCAAAATCTTTGCACAAG
+GATAGCAAAAACAAAGTACCCCAAATATCTACAAACCAATCTCACCCATCTGCCGTTTCA
+ACAGCGAACACACCGGGGCCATCACCTAACGAGGCGTTAAAACAGTTGCGTGCATCTAAA
+GAAGACTCCTGCGAACGGATCTTGAAAAACGCAATGAAAAGACATAACTTAGCAGATCAG
+GATTGGAGACAATATGTCTTGGTCATTTGCTATGGGGATCAAGAGAGGCTGTTAGAATTG
+AACGAAAAGCCTGTGATCATATTCAAGAACTTAAAGCAACAGGGTTTGCACCCCGCCATT
+ATGTTAAGAAGAAGAGGTGATTTCGAAGAAGTAGCAATGATGAACGGAAGTGACAATGTC
+ACCCCCGGTGGAAGACTCTAA
+>HIS4 2400 residues Pha 0 Code 0
+ATGGTTTTGCCGATTCTACCGTTAATTGATGATCTGGCCTCATGGAATAGTAAGAAGGAA
+TACGTTTCACTTGTTGGTCAGGTACTTTTGGATGGCTCGAGCCTGAGTAATGAAGAGATT
+CTCCAGTTCTCCAAAGAGGAAGAAGTTCCATTGGTGGCTTTGTCCTTGCCAAGTGGTAAA
+TTCAGCGATGATGAAATCATTGCCTTCTTGAACAACGGAGTTTCTTCTCTGTTCATTGCT
+AGCCAAGATGCTAAAACAGCCGAACACTTGGTTGAACAATTGAATGTACCAAAGGAGCGT
+GTTGTTGTGGAAGAGAACGGTGTTTTCTCCAATCAATTCATGGTAAAACAAAAATTCTCG
+CAAGATAAAATTGTGTCCATAAAGAAATTAAGCAAGGATATGTTGACCAAAGAAGTGCTT
+GGTGAAGTACGTACAGACCGTCCTGACGGTTTATATACCACCCTAGTTGTCGACCAATAT
+GAGCGTTGTCTAGGGTTGGTGTATTCTTCGAAGAAATCTATAGCAAAGGCCATCGATTTG
+GGTCGTGGCGTTTATTATTCTCGTTCTAGGAATGAAATCTGGATCAAGGGTGAAACTTCT
+GGCAATGGCCAAAAGCTTTTACAAATCTCTACTGACTGTGATTCGGATGCCTTAAAGTTT
+ATCGTTGAACAAGAAAACGTTGGATTTTGCCACTTGGAGACCATGTCTTGCTTTGGTGAA
+TTCAAGCATGGTTTGGTGGGGCTAGAATCTTTACTAAAACAAAGGCTACAGGACGCTCCA
+GAGGAATCTTATACTAGAAGACTATTCAACGACTCTGCATTGTTAGATGCCAAGATCAAG
+GAAGAAGCTGAAGAACTGACTGAGGCAAAGGGTAAGAAGGAGCTTTCTTGGGAGGCTGCC
+GATTTGTTCTACTTTGCACTGGCCAAATTAGTGGCCAACGATGTTTCATTGAAGGACGTC
+GAGAATAATCTGAATATGAAGCATCTGAAGGTTACAAGACGGAAAGGTGATGCTAAGCCA
+AAGTTTGTTGGACAACCAAAGGCTGAAGAAGAAAAACTGACCGGTCCAATTCACTTGGAC
+GTGGTGAAGGCTTCCGACAAAGTTGGTGTGCAGAAGGCTTTGAGGAGACCAATCCAAAAG
+ACTTCTGAAATTATGCATTTAGTCAATCCGATCATCGAAAATGTTAGAGACAAAGGTAAC
+TCTGCCCTTTTGGAGTACACAGAAAAGTTTGATGGTGTAAAATTATCCAATCCTGTTCTT
+AATGCTCCATTCCCAGAAGAATACTTTGAAGGTTTAACCGAGGAAATGAAGGAAGCTTTG
+GACCTTTCAATTGAAAACGTCCGCAAATTCCATGCTGCTCAATTGCCAACAGAGACTCTT
+GAAGTTGAAACCCAACCTGGTGTCTTGTGTTCCAGATTCCCTCGTCCTATTGAAAAAGTT
+GGTTTGTATATCCCTGGTGGCACTGCCATTTTACCAAGTACTGCATTAATGCTTGGTGTT
+CCAGCACAAGTTGCCCAATGTAAGGAGATTGTGTTTGCATCTCCACCAAGAAAATCTGAT
+GGTAAAGTTTCACCCGAAGTTGTTTATGTCGCAGAAAAAGTTGGCGCTTCCAAGATTGTT
+CTAGCTGGTGGTGCCCAAGCCGTTGCTGCTATGGCTTACGGGACAGAAACTATTCCTAAA
+GTGGATAAGATCTTGGGTCCAGGTAATCAATTTGTGACTGCCGCCAAAATGTATGTTCAA
+AATGACACTCAAGCTCTATGTTCCATTGATATGCCAGCTGGCCCAAGTGAAGTTTTGGTT
+ATTGCCGATGAAGATGCCGATGTGGATTTTGTTGCAAGTGATTTGCTATCGCAAGCTGAA
+CACGGTATTGACTCCCAAGTTATCCTTGTTGGTGTTAACTTGAGCGAAAAGAAAATTCAA
+GAGATTCAAGATGCTGTCCACAATCAAGCTTTACAACTGCCACGTGTGGATATTGTTCGT
+AAATGTATTGCTCACAGTACGATCGTTCTTTGTGACGGTTACGAAGAAGCCCTTGAAATG
+TCCAACCAATATGCACCAGAACATTTGATTCTACAAATCGCCAATGCTAACGATTATGTT
+AAATTGGTTGACAATGCAGGGTCCGTATTTGTGGGTGCTTACACTCCAGAATCGTGCGGT
+GACTATTCAAGTGGTACTAACCATACATTACCAACCTATGGTTACGCTAGGCAGTACAGT
+GGTGCCAACACTGCAACCTTCCAAAAGTTTATCACTGCCCAAAACATTACCCCTGAAGGT
+TTAGAAAACATCGGTAGAGCTGTTATGTGCGTTGCCAAGAAGGAGGGTCTAGACGGTCAC
+AGAAACGCTGTGAAAATCAGAATGAGTAAGCTTGGGTTGATCCCAAAGGATTTCCAGTAG
+>BIK1 1323 residues Pha 0 Code 0
+ATGGATAGATATCAAAGAAAGATAGGATGTTTCATACAAATCCCAAATTTGGGGCGCGGA
+CAACTGAAATACGTGGGTCCAGTGGACACGAAAGCTGGAATGTTTGCTGGTGTAGACTTA
+CTTGCCAACATTGGTAAGAACGATGGATCATTCATGGGGAAGAAGTATTTTCAAACAGAG
+TATCCTCAAAGTGGACTATTTATCCAGTTGCAAAAAGTCGCATCATTGATCGAGAAGGCA
+TCGATATCGCAAACCTCGAGAAGAACGACGATGGAACCGCTATCAATACCCAAAAACAGA
+TCTATTGTGAGGCTCACTAACCAGTTCTCTCCCATGGATGATCCTAAATCCCCCACACCC
+ATGAGAAGTTTCCGGATCACCAGTCGGCACAGCGGTAATCAACAGTCGATGGACCAGGAG
+GCATCGGATCACCATCAACAGCAAGAATTTGGTTACGATAACAGAGAAGACAGAATGGAG
+GTCGACTCTATCCTGTCATCAGACAGAAAGGCTAATCACAACACCACCAGCGATTGGAAA
+CCGGACAATGGCCACATGAATGACCTCAATAGCAGCGAAGTTACAATTGAATTACGAGAA
+GCCCAATTGACCATCGAAAAGCTACAAAGGAAACAACTACACTACAAAAGGCTACTCGAT
+GACCAAAGAATGGTCCTCGAAGAAGTGCAACCGACTTTTGATAGGTATGAAGCCACAATA
+CAAGAAAGAGAGAAAGAGATAGACCATCTCAAGCAACAATTGGAGCTCGAACGCAGACAG
+CAAGCCAAACAAAAGCAGTTTTTTGACGCTGAGAATGAACAGCTACTTGCTGTCGTAAGC
+CAACTACACGAAGAGATCAAAGAAAACGAAGAGAGAAATCTTTCTCATAATCAACCCACT
+GGTGCCAACGAAGATGTCGAACTCCTGAAAAAACAGCTGGAACAATTACGCAACATAGAA
+GACCAATTTGAGTTACACAAGACAAAGTGGGCTAAAGAACGCGAACAATTGAAAATGCAT
+AACGATTCGCTCAGTAAAGAATACCAAAATTTGAGCAAGGAACTATTTTTGACAAAACCA
+CAAGATTCCTCATCGGAAGAGGTGGCATCCTTAACGAAAAAACTTGAAGAGGCTAATGAA
+AAAATCAAACAGTTGGAACAGGCTCAAGCACAAACAGCCGTGGAATCGTTGCCAATTTTC
+GACCCCCCTGCACCAGTCGATACCACGGCAGGAAGACAACAGTGGTGTGAGCATTGCGAT
+ACGATGGGTCATAATACAGCAGAATGCCCCCATCACAATCCTGACAACCAGCAGTTCTTC
+TAG
+>FUS1 1539 residues Pha 0 Code 0
+ATGGTAGCAACAATAATGCAGACGACAACAACTGTGCTGACGACAGTCGCCGCAATGTCT
+ACTACCTTAGCATCAAATTACATATCTTCGCAAGCTAGTTCCTCGACGAGTGTAACAACA
+GTAACGACAATAGCGACATCAATACGCTCTACACCGTCTAATCTACTCTTTTCTAATGTG
+GCGGCTCAGCCAAAATCATCTTCAGCAAGCACAATTGGGCTTTCAATCGGACTTCCCATC
+GGAATATTCTGTTTCGGATTACTTATCCTTTTGTGTTATTTCTACCTTAAAAGGAATTCG
+GTGTCCATTTCAAATCCACCCATGTCAGCTACGATTCCAAGGGAAGAGGAATATTGTCGC
+CGCACTAATTGGTTCTCACGGTTATTTCGGCAGAGTAAGTGTGAGGATCAGAATTCATAT
+TCTAATCGTGATATTGAGAAGTATAACGACACCCAGTGGACCTCGGGTGATAACATGTCT
+TCAAAAATACAGTACAAAATTTCCAAACCCATAATACCGCAGCATATACTGACACCTAAG
+AAAACGGTGAAGAACCCATATGCTTGGTCTGGTAAAAACATTTCGTTAGACCCCAAAGTG
+AACGAAATGGAGGAAGAGAAAGTTGTGGATGCATTCCTGTATACTAAACCACCGAATATT
+GTCCATATTGAATCCAGCATGCCCTCGTATAATGATTTACCTTCTCAAAAAACGGTGTCC
+TCAAAGAAAACTGCGTTAAAAACGAGTGAGAAATGGAGTTACGAATCTCCACTATCTCGA
+TGGTTCTTGAGGGGTTCTACATACTTTAAGGATTATGGCTTATCAAAGACCTCTTTAAAG
+ACCCCAACTGGGGCTCCACAACTGAAGCAAATGAAAATGCTCTCCCGGATAAGTAAGGGT
+TACTTCAATGAGTCAGATATAATGCCTGACGAACGATCGCCCATCTTGGAGTATAATAAC
+ACGCCTCTGGATGCAAATGACAGCGTGAATAACTTGGGTAATACCACGCCAGATTCACAA
+ATCACATCTTATCGCAACAATAACATCGATCTAATCACGGCAAGACCCCATTCAGTGATA
+TACGGTACTACTGCACAACAAACTTTGGAAACCAACTTCAATGATCATCATGACTGCAAT
+AAAAGCACTGAGAAACACGAGTTGATAATACCCACCCCATCAAAACCACTAAAGAAAAGG
+AAAAAAAGAAGACAAAGTAAAATGTATCAGCATTTACAACATTTGTCACGTTCTAAACCA
+TTGCCGCTTACTCCAAACTCCAAATATAATGGAGAGGCTAGCGTCCAATTAGGGAAGACA
+TATACAGTTATTCAGGATTACGAGCCTAGATTGACAGACGAAATAAGAATCTCGCTGGGT
+GAAAAAGTTAAAATTCTGGCCACTCATACCGATGGATGGTGTCTGGTAGAGAAGTGTAAT
+ACACGAAAGGGTACTATTCACGTCAGTGTTGACGATAAAAGATACCTCAATGAAGATAGA
+GGCATTGTGCCTGGTGACTGTCTCCAAGAATACGACTGA
+>YC08 579 residues Pha 0 Code 0
+ATGTCCCCAACTGGAAACTACTTAAACGCTATTACAAACCGTCGTACCATCTACAATTTG
+AAGCCCGAATTACCACAAGGTGTCGGTTTGGATGATGTAAAGAGAACTGTACACGTTATT
+CTCAAGAATACGCCAACAGCTTTTAACTCACAAGTGAATCGCGCTGTCATTATCGTTGGT
+GATACACACAAAAGGATATGGGATGCTGTTGCGAGCGCAATGCCAACTGCTGAAGCCAAG
+AAGAGACCAGAGTCTTGCAGAGATGAGGCTTACGGTTCAGTCATTTTCTTCACTGATGAA
+GGACCAACTGAAAACTGCAAGAGATTTTCCAGCCTTGGCACCGCTTTCCCAACATGCGCC
+GCTCATACGACCGGTGCTGTGCAAATTCAGTCTTGGACTGCCCTCGAACTATTGGGATTG
+GGGGCTAATTTGCAACACTATAATGACTACGTCAAATCTGCTTTGCCTCAAGATGTTCCT
+ATTGCGTGGACTGTACAATCTCAATTGGTCTTTGGTGTTCCAACTGCCTTGCCAGAAGAA
+AAGACTTTTATCAATAACGTAATCAACGTTTATCACTGA
+>AGP1 1902 residues Pha 0 Code 0
+ATGTCGTCGTCGAAGTCTCTATACGAACTGAAAGACTTGAAAAATAGCTCCACAGAAATA
+CATGCCACGGGGCAGGATAATGAAATTGAATATTTCGAAACAGGCTCCAATGACCGTCCA
+TCCTCACAACCTCATTTAGGTTACGAACAGCATAACACTTCTGCCGTGCGTAGGTTTTTC
+GACTCCTTTAAAAGAGCGGATCAGGGTCCACAGGATGAAGTAGAAGCAACACAAATGAAC
+GATCTTACGTCGGCTATCTCACCTTCTTCTAGACAGGCTCAAGAACTAGAAAAAAATGAA
+AGTTCGGACAACATAGGCGCTAATACAGGTCATAAGTCGGACTCGCTGAAGAAAACCATT
+CAGCCTAGACATGTTCTGATGATTGCGTTGGGTACGGGTATCGGTACTGGGTTACTGGTC
+GGTAACGGTACCGCGTTGGTTCATGCGGGTCCAGCTGGACTACTTATTGGTTACGCTATT
+ATGGGTTCTATCTTGTACTGTATTATTCAAGCATGTGGTGAAATGGCGCTAGTGTATAGT
+AACTTGACTGGTGGCTACAATGCATACCCAGTTTCCTTGTGGATGATGGTTTTTGGGTTT
+GCAGTCGCTTGGGTTTATTGTTTGCAATGGCTGTGTGTGTGTCCTCTGGAATTGGTGACC
+GCATCCATGACTATCAAATATTGGACGACATCTGTGAACCCGGATGTGTTCGTCATTATT
+TTCTATGTTTTGGTGATTACTATTAATATTTTCGGTGCTCGTGGTTATGCAGAAGCTGAG
+TTCTTCTTCAACTGTTGCAAAATTTTGATGATGACTGGGTTCTTCATTCTTGGTATTATC
+ATCGATGTTGGTGGCGCTGGTAATGATGGTTTTATTGGTGGTAAATACTGGCACGATCCG
+GGCGCTTTCAATGGTAAACATGCCATTGACAGATTTAAAGGTGTTGTTGCAACATTAGTG
+ACTGCTGCTTTTGCCTTTGGTGGTTCAGAGTTTATTGCCATCACCACTGCAGAACAATCT
+AATCCAAGAAAGGCCATTCCAGGTGCGGCCAAACAAATGATCTACAGAATCTTATTCCTA
+TTCTTGGCTACCATTATTCTACTGGGTTTCTTGGTGCCATACAATTCCGATCAATTATTG
+GGTTCTACCGGTGGTGGTACTAAAGCCTCGCCATATGTCATTGCTGTTGCATCCCACGGT
+GTCCGTGTCGTCCCACACTTCATTAACGCCGTTATTCTACTTTCCGTGCTGTCCATGGCT
+AACTCCTCCTTCTACTCCAGTGCTCGTTTATTTTTAACTCTATCCGAGCAAGGTTACGCT
+CCTAAGGTTTTCTCCTACATCGACAGAGCCGGTAGACCATTGATTGCCATGGGTGTTTCT
+GCATTGTTTGCCGTTATTGCCTTCTGTGCTGCATCTCCCAAGGAAGAACAAGTTTTCACT
+TGGTTATTGGCCATTTCTGGTTTGTCTCAGCTTTTCACATGGACTGCCATTTGTTTATCC
+CATCTTAGATTTAGAAGAGCCATGAAAGTCCAAGGGAGATCTCTTGGAGAATTGGGTTTC
+AAATCTCAAACTGGTGTTTGGGGATCTGCCTACGCTTGCATTATGATGATTTTAATTCTT
+ATTGCCCAATTTTGGGTCGCTATCGCCCCCATTGGTGAAGGTAAGCTGGATGCACAAGCC
+TTTTTCGAAAACTACTTGGCTATGCCAATCTTGATTGCACTATATGTCGGCTACAAGGTC
+TGGCACAAGGATTGGAAACTGTTCATCAGGGCCGACAAGATCGACCTAGATTCTCATAGA
+CAAATCTTTGATGAAGAATTAATCAAGCAAGAAGACGAAGAATATAGGGAACGTTTGAGG
+AACGGACCTTATTGGAAAAGGGTCGTTGCCTTCTGGTGTTAA
+>LEU2 1095 residues Pha 0 Code 0
+ATGTCTGCCCCTAAGAAGATCGTCGTTTTGCCAGGTGACCACGTTGGTCAAGAAATCACA
+GCCGAAGCCATTAAGGTTCTTAAAGCTATTTCTGATGTTCGTTCCAATGTCAAGTTCGAT
+TTCGAAAATCATTTAATTGGTGGTGCTGCTATCGATGCTACAGGTGTCCCACTTCCAGAT
+GAGGCGCTGGAAGCCTCCAAGAAGGTTGATGCCGTTTTGTTAGGTGCTGTGGGTGGTCCT
+AAATGGGGTACCGGTAGTGTTAGACCTGAACAAGGTTTACTAAAAATCCGTAAAGAACTT
+CAATTGTACGCCAACTTAAGACCATGTAACTTTGCATCCGACTCTCTTTTAGACTTATCT
+CCAATCAAGCCACAATTTGCTAAAGGTACTGACTTCGTTGTTGTCAGAGAATTAGTGGGA
+GGTATTTACTTTGGTAAGAGAAAGGAAGACGATGGTGATGGTGTCGCTTGGGATAGTGAA
+CAATACACCGTTCCAGAAGTGCAAAGAATCACAAGAATGGCCGCTTTCATGGCCCTACAA
+CATGAGCCACCATTGCCTATTTGGTCCTTGGATAAAGCTAATGTTTTGGCCTCTTCAAGA
+TTATGGAGAAAAACTGTGGAGGAAACCATCAAGAACGAATTCCCTACATTGAAGGTTCAA
+CATCAATTGATTGATTCTGCCGCCATGATCCTAGTTAAGAACCCAACCCACCTAAATGGT
+ATTATAATCACCAGCAACATGTTTGGTGATATCATCTCCGATGAAGCCTCCGTTATCCCA
+GGTTCCTTGGGTTTGTTGCCATCTGCGTCCTTGGCCTCTTTGCCAGACAAGAACACCGCA
+TTTGGTTTGTACGAACCATGCCACGGTTCTGCTCCAGATTTGCCAAAGAATAAGGTCAAC
+CCTATCGCCACTATCTTGTCTGCTGCAATGATGTTGAAATTGTCATTGAACTTGCCTGAA
+GAAGGTAAGGCCATTGAAGATGCAGTTAAAAAGGTTTTGGATGCAGGTATCAGAACTGGT
+GATTTAGGTGGTTCCAACAGTACCACGGAAGTCGGTGATGCTGTCGCCGAAGAAGTTAAG
+AAAATCCTTGCTTAA
+>NFS1 1494 residues Pha 0 Code 0
+ATGTTGAAATCAACTGCTACAAGATCGATAACAAGATTATCTCAAGTTTACAACGTTCCA
+GCGGCCACATATAGGGCTTGTTTGGTAAGCAGGAGATTCTATTCCCCTCCTGCAGCAGGC
+GTGAAGTTAGACGACAACTTCTCTCTGGAAACGCATACCGATATTCAGGCTGCTGCAAAG
+GCACAGGCTAGTGCCCGTGCGAGTGCATCCGGTACCACCCCAGATGCTGTAGTAGCTTCT
+GGTAGCACTGCAATGAGCCATGCTTATCAAGAAAACACAGGTTTTGGTACTCGTCCCATA
+TATCTTGACATGCAAGCCACTACACCAACAGACCCTAGGGTTTTGGATACGATGTTGAAG
+TTTTATACGGGACTTTATGGTAATCCTCATTCCAACACTCACTCTTACGGTTGGGAAACA
+AATACTGCTGTGGAAAATGCTAGAGCTTACGTAGCAAAGATGATCAATGCCGACCCCAAG
+GAAATAATATTCACTTCGGGAGCGACCGAATCTAATAATATGGTTCTTAAGGGTGTCCCA
+AGATTTTATAAGAAGACTAAGAAACACATCATCACCACTAGAACGGAACACAAGTGTGTC
+TTGGAAGCCGCACGGGCCATGATGAAGGAGGGATTTGAAGTCACTTTCCTAAATGTGGAC
+GATCAAGGTCTTATCGATTTGAAGGAATTGGAAGATGCCATTAGACCAGATACCTGTCTC
+GTCTCTGTGATGGCTGTCAATAATGAAATCGGTGTCATTCAACCTATTAAAGAAATTGGT
+GCAATTTGTAGAAAGAATAAGATCTACTTTCATACTGACGCCGCACAAGCCTATGGTAAG
+ATTCACATTGATGTCAATGAAATGAACATTGATTTACTATCAATTTCTTCTCACAAGATT
+TACGGTCCAAAGGGAATAGGTGCCATCTATGTAAGAAGGAGACCAAGAGTTAGATTAGAA
+CCTTTACTATCCGGTGGTGGCCAAGAGAGAGGATTGAGATCTGGTACTTTGGCCCCCCCA
+TTGGTAGCGGGATTTGGTGAAGCTGCGAGATTGATGAAGAAAGAATTTGACAACGACCAA
+GCTCACATCAAAAGACTATCCGATAAATTAGTCAAAGGTCTATTATCCGCTGAACATACC
+ACGTTGAACGGATCTCCAGATCATCGTTATCCAGGGTGTGTTAACGTTTCTTTCGCCTAC
+GTGGAAGGAGAATCTTTATTGATGGCACTAAGGGATATCGCATTATCCTCGGGTTCAGCC
+TGTACATCTGCTTCCCTAGAACCTTCTTATGTTTTACATGCGCTGGGTAAGGATGATGCA
+TTAGCCCATTCTTCCATCAGATTTGGTATTGGTAGATTTAGTACTGAAGAGGAGGTCGAC
+TACGTCGTTAAGGCCGTTTCTGACAGAGTAAAATTCTTGAGGGAACTTTCACCATTATGG
+GAAATGGTTCAAGAAGGTATTGACTTAAACTCCATCAAATGGTCAGGTCATTGA
+>BUD3 4104 residues Pha 0 Code 0
+ATGGAGAAAGACCTGTCGTCTCTTTACTCTGAAAAGAAAGACAAAGAGAACGATGAAACC
+TTATTTAACATCAAACTATCCAAATCTGTTGTCGAGACCACACCGCTAAATGGTCATTCA
+TTGTTTGATGATGATAAATCACTTTCAGACTGGACGGATAATGTGTTCACTCAATCAGTA
+TTCTATCACGGGTCAGATGACTTGATATGGGGGAAGTTCTTTGTCTGCGTGTACAAGTCC
+CCCAACAGCAATAAGTTGAACGCTATAATATTCGACAAATTAGGAACATCATGCTTCGAA
+TCCGTCGATATATCTTCCAACTCGCAATACTATCCGGCCATTGAGAATTTGAGTCCAAGT
+GATCAGGAAAGCAATGTTAAGAAATGCATTGCTGTCATTCTGTTACAGCGCTATCCATTA
+CTTTCACCATCAGACTTATCACAAATATTGTCCAATAAATCGGAAAATTGCGACTATGAC
+CCCCCTTATGCTGGAGATTTGGCTAGTAGTTGCCAGTTGATAACAGCAGTTCCTCCAGAA
+GATCTGGGGAAGCGCTTCTTTACATCAGGACTTCTGCAAAATAGATTTGTCAGCTCTACC
+CTGTTAGATGTTATTTATGAAAACAATGAATCCACCATCGAACTAAATAATAGGTTGGTA
+TTCCATCTGGGTGAACAACTTGAACAACTTTTTAACCCAGTCACAGAATACTCACCGGAA
+CAGACAGAATATGGTTATAAGGCGCCAGAGGACGAATTACCCACAGAATCGGATGATGAT
+CTTGTCAAGGCCATTTGCAACGAGTTATTACAACTACAAACAAATTTTACTTTCAATTTG
+GTAGAATTTTTGCCAAAATTCCTGATCGCCTTGAGAGTCAGAGTACTCAATGAAGAAATT
+AATGGGTTATCCACAACCAAATTAAATCGACTCTTCCCACCTACAATAGATGAAGTCACA
+AGAATCAATTGTATTTTTCTAGACTCGCTAAAGACAGCAATCCCTTACGGTTCCCTCGAA
+GTACTGAAGGCATGCAGCATTACTATTCCTTATTTCTACAAAGCATATACAAGACACGAG
+GCGGCCACAAAGAACTTCAGCAAAGATATTAAATTGTTTATTAGGCATTTCAGCAATGTA
+ATTCCAGAAAGAGAGGTCTACACGGAAATGAAAATCGAGAGTATAATTAAGGGACCTCAG
+GAAAAACTACTGAAGCTAAACTTAATTATAGAGAGATTGTGGAAGTCGAAAAAATGGAGA
+CCGAAAAATCAAGAAATGGCAAAAAAATGCTACAACAATATCATTGATGTCATTGATTCG
+TTTGGAAAATTAGATTCCCCACTTCATTCTTATAGTACCAGAGTATTTACTCCATCGGGA
+AAAATCCTTACAGAATTAGCCAAATGCTGGCCCGTAGAACTGCAATACAAATGGCTGAAG
+AGAAGGGTAGTCGGTGTGTATGATGTAGTGGATTTGAATGATGAAAATAAGAGAAATTTA
+TTAGTCATATTCAGTGATTATGTGGTTTTCATCAATATACTGGAGGCAGAAAGTTACTAC
+ACTTCAGATGGATCAAACAGGCCCTTAATCTCAGATATTTTAATGAACTCATTGATCAAC
+GAAGTTCCGTTGCCCTCCAAGATCCCTAAGTTGAAAGTGGAGCGTCATTGCTATATAGAT
+GAGGTTCTAGTTTCTATATTAGACAAAAGCACTCTACGTTTTGATCGATTGAAGGGAAAA
+GATTCTTTCTCAATGGTATGTAAATTATCCTCTGCATTTATCTCTTCTTCGTCAGTTGCT
+GACTTGATTACGAAGGCTAGAATTTTGGAAAAAGACACTGCATTTCATTTATTTAAAGCT
+AGTAGAAGCCATTTTACATTATATTCTACTGCTCACGAGCTTTGCGCTTATGATTCCGAA
+AAAATAAAATCAAAATTTGCCTTATTCCTGAACATACCACCATCCAAGGAGATATTGGAG
+GTCAACAACCTTCATTTGGCTTTTTTTGCAAGATTTTGCAGTAACGATGGTAGAGATAAC
+ATCGTAATCTTAGACGTCTTAACCAAACATGACGATAAACATATAGAAGTTACATCCGAT
+AACATTGTTTTCACCATAATTAATCAATTGGCCATTGAAATACCGATATGCTTTTCTTCC
+TTAAACTCATCGATGGCCAAAGATTTACTCTGTGTAAATGAGAATTTGATAAAAAACTTA
+GAACATCAATTGGAAGAGGTCAAGCACCCTTCAACAGACGAACATAGGGCTGTTAATAGC
+AAACTTTCCGGTGCATCCGATTTCGATGCTACTCACGAGAAGAAAAGATCATACGGTACC
+ATAACAACATTTAGAAGCTATACAAGCGACTTGAAGGACAGTCCATCAGGCGATAATAGT
+AATGTCACCAAGGAAACTAAGGAAATTTTACCAGTGAAACCTACGAAAAAGTCTTCAAAA
+AAACCAAGAGAAATTCAAAAGAAGACCAAGACAAACGCCTCTAAAGCAGAGCACATAGAA
+AAGAAGAAGCCTAACAAAGGCAAAGGGTTTTTTGGCGTGTTAAAAAATGTTTTTGGAAGT
+AAAAGCAAGAGCAAGCCTTCACCAGTTCAAAGAGTGCCTAAAAAAATATCGCAGAGGCAT
+CCTAAGTCTCCAGTGAAGAAGCCAATGACCTCAGAAAAGAAATCCTCCCCTAAAAGGGCA
+GTCGTTTCATCTCCCAAAATTAAAAAGAAAAGTACTTCTTTTTCCACAAAAGAATCACAA
+ACTGCTAAATCTTCTCTTCGAGCAGTTGAATTCAAATCTGATGACTTGATCGGAAAACCA
+CCTGATGTTGGAAATGGCGCACATCCTCAAGAAAATACCAGAATATCTTCAGTAGTAAGG
+GATACAAAATATGTCTCCTACAATCCCTCTCAGCCTGTGACAGAAAATACCAGTAACGAA
+AAAAATGTCGAACCAAAAGCGGATCAATCCACAAAGCAGGATAACATTTCCAATTTTGCA
+GATGTAGAGGTATCTGCGTCTTCTTATCCTGAAAAACTTGATGCAGAAACAGATGATCAA
+ATAATTGGGAAGGCGACGAATTCGTCATCAGTTCATGGAAATAAAGAGCTGCCAGACCTT
+GCTGAGGTGACTACAGCAAATAGGGTTTCTACAACATCGGCTGGGGACCAACGTATTGAT
+ACCCAAAGCGAATTTTTACGTGCAGCTGATGTTGAAAACTTAAGTGATGACGATGAACAC
+AGACAGAATGAAAGTAGAGTTTTTAACGATGACCTCTTTGGTGATTTTATTCCTAAGCAT
+TACCGTAATAAACAGGAGAACATTAACAGCTCGAGTAATTTGTTTCCAGAGGGAAAGGTG
+CCCCAAGAAAAGGGCGTATCAAATGAAAACACTAACATATCTCTCAAAACTAATGAAGAT
+GCATCTACATTGACGCAGAAACTCTCTCCACAAGCGAGTAAAGTGCTGACAGAAAATTCT
+AATGAATTAAAAGATACCAACAATGAAGGGAAGGACGCAAAGGACATAAAATTAGGAGAT
+GATTACAGTGATAAAGAAACAGCGAAAGAAATAACTAAACCAAAAAATTTTGTTGAAGGA
+ATAACTGAACGGAAAGAAATATTCCCCACTATTCCTAGGTTAGCGCCGCCAGCTTCAAAA
+ATTAACTTTCAAAGGTCACCATCCTATATTGAGCTCTTTCAAGGAATGAGGGTGGTTTTA
+GATAAGCATGATGCCCATTATAACTGGAAACGCTTGGCTAGTCAAGTCTCCTTAAGTGAG
+GGACTAAAAGTCAATACTGAGGAAGATGCGGCAATTATAAATAAAAGTCAGGATGATGCC
+AAGGCGGAAAGAATGACTCAAATTTCTGAAGTGATTGAGTATGAAATGCAGCAACCTATC
+CCAACTTATTTGCCTAAGGCGCATCTAGATGACTCGGGTATTGAAAAAAGTGATGACAAA
+TTCTTCGAAATTGAAGAAGAACTTAAGGAAGAATTGAAGGGCAGCAAAACGGTAATGAAG
+ATGTCGGTAATAATAATCCATCCAATTCTATTCCAAAAATCGAGAAGCCCCCAGCATTCA
+AAGTTATTAGAACATCGCCTGTGA
+>GBP2 1284 residues Pha 0 Code 0
+ATGGAGAGAGAGCTAGGGATGTATGGAAATGATAGGAGTAGATCAAGATCACCTGTACGT
+CGTCGTTTGAGCGACGACAGAGACAGGTACGATGATTATAACGATAGTAGCAGTAATAAT
+GGTAATGGCAGTCGTCGTCAGAGACGCGACCGAGGCTCCCGTTTCAATGATCGGTACGAT
+CAGAGTTATGGTGGCAGCCGCTACCACGATGATAGGAACTGGCCCCCTCGCCGAGGAGGC
+CGTGGCAGAGGAGGAAGCAGATCATTCAGAGGGGGACGCGGTGGCGGTAGGGGTCGTACT
+TTAGGTCCAATTGTTGAAAGAGACTTAGAAAGGCAATTTGACGCGACCAAGAGAAATTTT
+GAAAATAGTATCTTCGTGAGAAACTTGACTTTTGATTGTACCCCTGAAGACCTTAAGGAA
+TTGTTTGGTACAGTGGGCGAAGTTGTGGAGGCTGACATTATCACATCAAAGGGCCATCAC
+CGTGGTATGGGGACTGTGGAATTTACCAAAAACGAATCTGTCCAAGATGCCATATCGAAG
+TTTGATGGTGCCCTCTTTATGGACCGGAAACTAATGGTAAGACAGGATAATCCTCCTCCT
+GAAGCTGCCAAGGAATTTTCTAAGAAAGCTACTAGGGAAGAAATAGATAATGGGTTTGAA
+GTGTTCATCATCAATTTACCGTACTCTATGAATTGGCAATCCTTAAAAGATATGTTTAAA
+GAATGTGGTCATGTCTTGCGTGCCGATGTAGAATTGGATTTCAACGGATTTTCAAGAGGA
+TTCGGTTCTGTCATTTATCCTACTGAGGATGAAATGATTAGAGCTATCGATACATTCAAC
+GGCATGGAAGTAGAAGGTAGAGTTTTGGAAGTTAGAGAAGGGCGTTTCAACAAGAGAAAG
+AACAATGATCGTTATAATCAAAGGCGTGAGGACCTTGAAGATACCAGAGGTACTGAACCA
+GGTCTTGCGCAGGATGCCGCTGTCCACATTGATGAAACTGCAGCAAAATTTACTGAAGGT
+GTCAATCCAGGAGGGGATAGAAACTGTTTCATTTATTGTAGTAATTTACCATTCTCAACA
+GCAAGAAGCGATTTATTCGACTTGTTTGGGCCTATCGGCAAAATCAATAACGCGGAATTG
+AAACCACAGGAAAATGGTCAACCAACTGGTGTTGCTGTTGTAGAATATGAAAATTTAGTA
+GATGCAGATTTTTGTATTCAAAAATTAAATAATTATAATTATGGTGGTTGTAGTTTACAG
+ATCTCTTATGCTAGACGTGATTAA
+>ILV6 930 residues Pha 0 Code 0
+ATGCTGAGATCGTTATTGCAAAGCGGCCACCGCAGGGTGGTTGCTTCTTCATGTGCTACC
+ATGGTGCGTTGCAGTTCCTCGTCGACCTCCGCGTTGGCGTACAAGCAGATGCACAGACAC
+GCAACAAGACCTCCCTTGCCCACACTAGACACTCCTTCCTGGAATGCCAACAGTGCCGTT
+TCATCCATCATTTACGAAACACCAGCGCCTTCTCGTCAACCAAGAAAACAGCATGTCTTG
+AACTGTTTGGTGCAAAACGAACCCGGTGTCTTGTCCAGAGTCTCGGGTACGTTAGCTGCC
+AGAGGCTTTAACATCGATTCGTTGGTCGTGTGCAACACCGAGGTCAAAGACCTAAGTAGA
+ATGACCATTGTTTTGCAAGGGCAAGATGGCGTAGTCGAACAAGCACGCAGACAAATCGAA
+GACTTGGTCCCCGTCTACGCCGTCCTAGACTATACCAATTCTGAGATCATCAAAAGAGAG
+CTAGTGATGGCCAGAATCTCTCTATTGGGTACTGAATACTTCGAAGACCTACTATTGCAC
+CACCACACTTCCACCAATGCTGGCGCCGCTGACTCCCAAGAATTGGTCGCCGAAATCAGA
+GAAAAGCAATTCCACCCTGCCAACTTGCCCGCCAGTGAGGTATTAAGGTTGAAGCACGAG
+CATTTGAACGATATCACCAACTTGACCAACAACTTTGGAGGTCGTGTCGTCGACATCAGC
+GAAACAAGCTGTATTGTGGAATTGTCTGCAAAACCCACACGTATCTCTGCCTTCTTGAAG
+TTGGTCGAGCCATTCGGTGTCCTAGAGTGTGCAAGAAGCGGTATGATGGCATTGCCAAGA
+ACTCCTTTGAAGACAAGCACCGAGGAAGCTGCCGACGAAGACGAAAAGATCAGCGAAATC
+GTCGACATTTCCCAACTACCACCTGGTTAG
+>CWH36 393 residues Pha 0 Code 0
+ATGGAGCTGGCAAAGGAACGTAATGGCCCACATCAAAAACATCATGGCCAATGTCAAAAT
+CACTGTACTTCTCCAAACACTGTACGACAAAACAAAACAAACAAACTCTTGTTAGTAAAA
+AAGAAAGGGAAACTAGTAATATGGAGACACATCGTAAAAAAAATGTTGCACATACGCTTG
+GTTGTTCTTTGGAGCCATTATCCAGAACAGCACGGACATGGCACTAACCACTATGAATAC
+ACCAACAACAGTATAGCTAAATTGGACGCGCAGAGAGTTAGTAGAAGAAGAAGGAAGAAA
+AGGGAAGCGGAGAGAAGAGATTATGACACATACAAACTACTCATTACTCTTTGTTCTTTA
+TTATTCGTTGGACCTTTGTTTCTTAAAGTATAG
+>PEL1 1251 residues Pha 0 Code 0
+ATGACGACTCGTTTGCTCCAACTCACTCGTCCTCATTACAGATTATTATCCCTACCTCTC
+CAGAAACCCTTCAATATAAAAAGGCAGATGTCCGCTGCGAACCCTTCTCCATTTGGCAAT
+TATTTGAACACGATCACTAAGTCCCTACAACAGAATTTACAAACATGCTTTCATTTCCAA
+GCAAAAGAAATCGATATAATCGAATCTCCATCTCAGTTTTACGATCTCTTGAAGACAAAA
+ATACTTAATTCACAAAATAGAATATTCATTGCGTCTCTGTATTTAGGCAAAAGCGAGACT
+GAGTTGGTGGACTGCATATCCCAGGCATTGACCAAGAACCCCAAGTTGAAAGTTTCTTTT
+CTACTTGATGGCCTTCGAGGAACAAGAGAATTGCCTTCCGCCTGTTCCGCCACTTTATTA
+TCGTCTTTAGTAGCCAAATATGGGTCAGAGAGAGTGGATTGCCGATTGTACAAGACGCCT
+GCTTATCATGGTTGGAAAAAAGTCTTGGTTCCCAAGAGATTTAATGAAGGTTTAGGCTTA
+CAACATATGAAAATATATGGGTTTGATAACGAGGTCATTCTTTCGGGAGCCAACCTTTCG
+AACGACTATTTCACCAACAGACAAGATAGATACTATCTCTTTAAATCTCGAAACTTCTCC
+AACTATTATTTTAAATTACATCAACTCATAAGTTCCTTCAGTTATCAGATTATAAAGCCA
+ATGGTGGATGGTAGCATCAACATCATTTGGCCAGATTCGAATCCTACTGTTGAACCGACG
+AAAAATAAAAGGCTGTTTTTAAGGGAAGCATCTCAATTACTAGATGGCTTTTTAAAGAGT
+TCTAAACAAAGCCTCCCGATTACTGCCGTGGGTCAATTCTCCACATTAGTTTACCCAATT
+TCTCAATTCACTCCACTTTTTCCCAAATATAATGACAAATCGACCGAAAAAAGAACAATA
+TTGTCATTGCTTTCCACTATAACAAGCAATGCCATTTCTTGGACGTTCACTGCAGGATAC
+TTCAATATTTTGCCAGACATCAAAGCAAAACTGCTGGCAACGCCGGTTGCTGAGGCAAAT
+GTAATAACAGCTTCCCCCTTTGCAAACGGCTTTTACCAATCAAAGGGCGTCTCATCAAAT
+TTACCTGGTGCTTACTTGTACCTGTCAAAAAAATTTCTACAAGATGTATGTAGGTACAGA
+CAAGATCATGCTATTACCATTAAGAGAATGGCAAAGAGGCGTAGTAAATAA
+>RER1 567 residues Pha 0 Code 0
+ATGGATTACGATAGCTCTGATACAATGAACGGTGGTTCAAGTAACCCCTTAATCACTAAG
+ATGAATACAATGAAATTATTATATCAACACTATTTGGATAAAGTCACTCCTCACGCTAAG
+GAGAGGTGGGCTGTATTGGGTGGTTTGTTATGTTTGTTTATGGTTCGTATTACAATGGCC
+GAAGGCTGGTATGTGATTTGTTATGGTCTAGGTCTATTTTTATTGAATCAATTTTTAGCC
+TTTTTGACCCCAAAATTCGATATGTCCTTACAGCAAGATGAAGAAAACAACGAATTGGAA
+GCTGGAGAAAAATCAGAAGAATTCCGTCCATTCATCAGAAGATTACCAGAGTTCAAATTC
+TGGTATAACAGCATTAGAGCCACTGTCATTTCCCTCTTGTTGTCGCTATTTTCAATCTTC
+GATATTCCAGTATTTTGGCCCATCTTATTGATGTATTTCATATTATTGTTTTTTTTAACT
+ATGAGAAGGCAGATTCAACATATGATAAAATATAGATATATACCCTTAGATATCGGTAAG
+AAGAAATATTCTCATTCTTCTAACTGA
+>CDC10 969 residues Pha 0 Code 0
+ATGGATCCTCTCAGCTCAGTACAGCCTGCTTCTTATGTTGGTTTTGATACCATCACGAAT
+CAGATCGAACATCGTCTGTTGAAGAAAGGTTTTCAATTTAATATAATGGTTGTTGGCCAA
+TCCGGATTGGGTAAAAGTACTCTAATAAATACGTTATTTGCCTCACATTTGATTGATTCT
+GCTACTGGTGATGATATTTCTGCCCTGCCTGTTACAAAAACAACTGAAATGAAAATTTCT
+ACTCATACTCTTGTGGAGGACCGCGTTCGCTTGAATATTAATGTTATAGATACACCTGGA
+TTTGGTGACTTTATTGACAATTCTAAAGCTTGGGAGCCTATTGTGAAGTACATTAAGGAA
+CAACATTCTCAATACTTACGTAAAGAATTGACAGCCCAACGTGAAAGGTTTATTACTGAT
+ACAAGAGTTCATGCAATTCTTTATTTCCTGCAACCAAATGGAAAGGAGTTGAGCCGCCTT
+GACGTTGAAGCCTTGAAAAGATTGACAGAAATAGCAAATGTTATACCAGTTATTGGCAAG
+TCGGATACATTGACTTTAGATGAAAGAACGGAGTTTAGGGAGCTTATTCAAAATGAATTC
+GAAAAATACAATTTCAAGATTTATCCTTATGATTCGGAAGAACTAACTGACGAGGAATTA
+GAACTAAACAGAAGTGTTAGATCTATCATTCCGTTTGCAGTGGTTGGTTCTGAGAATGAG
+ATTGAAATAAACGGTGAAACCTTCAGGGGAAGAAAAACTCGTTGGAGCGCTATTAATGTT
+GAGGATATCAACCAGTGTGATTTTGTATATTTAAGGGAATTTTTGATTCGAACTCATCTC
+CAAGACTTAATCGAAACAACTTCCTACATTCATTATGAAGGGTTCAGAGCAAGACAATTA
+ATTGCCTTGAAAGAAAATGCGAATAGTCGTTCCTCAGCTCATATGTCTAGCAACGCCATT
+CAACGTTGA
+>MRPL32 552 residues Pha 0 Code 0
+ATGAATTCTTTGATTTTTGGTAAACAATTAGCATTTCACAAAATTGTGCCTACCACTGCA
+ATTGGGTGGTTGGTACCGCTAGGAAATCCTTCACTGCAGATTCCAGGCCAAAAACAACTG
+GGATCTATCCACCGTTGGTTGAGAGAAAAGCTACAACAAGATCATAAGGACACTGAAGAT
+AAAGATTTTTTCTCTAATAATGGTATTCTACTAGCAGTTCCTAAAAAAAAAGTATCACAC
+CAAAAAAAAAGGCAAAAACTTTACGGTCCAGGTAAGAAGCAATTGAAGATGATTCACCAT
+TTGAATAAGTGCCCATCATGCGGCCATTATAAGAGAGCCAATACACTGTGTATGTATTGT
+GTTGGACAAATAAGTCATATATGGAAAACGCATACCGCTAAAGAAGAAATTAAGCCGAGA
+CAAGAGGAGGAACTTTCCGAACTAGACCAAAGAGTCCTATATCCTGGTAGAAGAGATACC
+AAATATACCAAGGATTTGAAAGATAAAGATAACTATTTGGAACGTCGCGTTCGGACTTTA
+AAAAAGGACTAG
+>YCP4 744 residues Pha 0 Code 0
+ATGGTAAAGATTGCGATAATTACTTACTCTACCTACGGGCACATAGACGTTTTAGCCCAA
+GCTGTTAAGAAAGGTGTGGAGGCAGCTGGTGGTAAAGCTGATATATACAGGGTCGAGGAA
+ACTTTACCTGATGAAGTCCTCACCAAGATGAACGCTCCTCAGAAACCTGAAGATATTCCT
+GTTGCCACTGAGAAAACGTTGCTCGAATATGACGCCTTTTTGTTCGGTGTTCCAACTAGG
+TTTGGTAATTTGCCGGCTCAATGGTCCGCCTTTTGGGATAAAACCGGTGGATTATGGGCC
+AAGGGCTCTTTGAACGGCAAAGCTGCGGGGATATTCGTTAGTACTTCCAGTTACGGAGGT
+GGTCAAGAAAGTACCGTTAAAGCCTGTTTGTCTTATTTAGCTCATCACGGAATTATCTTT
+TTACCACTGGGTTATAAGAATTCATTTGCTGAGTTAGCCAGTATAGAAGAGGTACACGGT
+GGCTCTCCATGGGGTGCTGGTACCCTTGCAGGACCTGACGGCTCAAGAACTGCGTCTCCA
+CTTGAATTGAGAATTGCTGAAATTCAAGGTAAAACATTCTACGAAACCGCCAAAAAACTT
+TTCCCTGCAAAAGAAGCCAAGCCCTCCACTGAAAAGAAGACCACTACTTCTGATGCGGCT
+AAGAGACAAACTAAACCTGCAGCAGCTACAACTGCAGAAAAGAAGGAGGACAAAGGATTA
+TTATCCTGCTGTACTGTCATGTAA
+>CIT2 1383 residues Pha 0 Code 0
+ATGACAGTTCCTTATCTAAATTCAAACAGAAATGTTGCATCATATTTACAATCAAATTCA
+AGCCAAGAAAAGACTCTAAAAGAGAGATTTAGCGAAATCTACCCCATCCATGCTCAAGAT
+GTAAGGCAATTCGTTAAAGAGCATGGCAAAACTAAAATTAGCGATGTTCTATTAGAACAG
+GTATATGGTGGTATGAGAGGTATTCCAGGGAGCGTATGGGAAGGTTCCGTTTTGGACCCA
+GAAGACGGTATTCGTTTCAGAGGTCGTACGATCGCCGACATTCAAAAGGACCTGCCCAAG
+GCAAAAGGAAGCTCACAACCACTACCAGAAGCTCTCTTTTGGTTATTGCTAACTGGCGAG
+GTTCCAACTCAAGCGCAAGTTGAAAACTTATCAGCTGATCTAATGTCAAGATCGGAACTA
+CCTAGTCATGTCGTTCAACTTTTGGATAATTTACCAAAGGACTTACACCCAATGGCTCAA
+TTCTCTATTGCTGTAACTGCCTTGGAAAGCGAGTCAAAGTTTGCTAAGGCTTATGCTCAA
+GGAATTTCCAAGCAAGATTATTGGAGTTATACTTTTGAAGATTCACTAGACTTGCTGGGT
+AAATTGCCAGTTATTGCAGCTAAAATTTATCGTAATGTATTCAAAGATGGCAAAATGGGT
+GAAGTGGACCCAAATGCCGATTATGCTAAAAATCTGGTCAACTTGATTGGTTCTAAGGAT
+GAAGATTTCGTGGACTTGATGAGACTTTATTTAACCATTCATTCGGATCACGAAGGTGGT
+AATGTATCTGCACATACATCCCATCTTGTGGGCTCAGCACTATCATCACCTTATCTGTCC
+CTTGCATCAGGTTTGAACGGGTTGGCTGGCCCACTTCATGGGCGTGCTAATCAAGAAGTA
+CTAGAATGGTTATTTGCACTTAAAGAAGAGGTAAATGATGACTACTCTAAAGATACGATC
+GAAAAATATTTATGGGATACTCTAAACTCAGGAAGAGTCATTCCCGGTTATGGTCATGCT
+GTGCTAAGGAAAACTGATCCTCGTTATATGGCTCAGCGTAAGTTTGCCATGGACCATTTT
+CCAGATTATGAATTATTCAAGTTAGTTTCATCAATATACGAGGTAGCACCTGGCGTATTG
+ACTGAACATGGTAAAACTAAAAATCCATGGCCAAATGTAGATGCTCACTCTGGTGTCTTA
+TTACAATATTATGGACTAAAAGAATCTTCTTTCTATACCGTTTTATTTGGCGTTTCAAGG
+GCATTTGGTATTCTTGCTCAATTGATCACTGATAGGGCCATCGGTGCTTCCATTGAAAGG
+CCAAAGTCCTATTCTACTGAGAAATACAAGGAATTGGTCAAAAACATTGAAAGCAAACTA
+TAG
+>YCP7 720 residues Pha 0 Code 0
+ATGCAGCCTCATTTAGACAACAACAGTAATAATGACGATGTCAAATTGGATACATTAGGG
+GAACAAAATGTGTTATCATCCGCAGAAAATATCACTTTACCTGAAGACACCTTTAAATCA
+TATATGACCTACTTGCTGTACGAGATGGCTCATTACAAACCGATGATATTTTCCTTCTTG
+GCACTTTCAGTTTCAATTTTAATAGTTGTGATCTTTCATAATGTTAAAGCTTGTGATGTC
+GTTTTTGGTTTTTCAATTTTCGTCACTTCTATTTTGTTTTTGTCTACGTTGATTCCGTTT
+AATGTGTATATCTCGGATGAGGGTTTCAGAATTAAGCTTTTGCTGGAAGTTATCACCCAC
+AGGCCAGCGGTAAAGGGAAAAGAATGGAGAGCAATCACAGACAATATGAATCAATATTTA
+CTTGATAATGGTTTATGGAGTACTCGCTATTACTTTTATAGTAGTGAAAGATGCTACAAA
+TTCTTCAGATTTCTTGTGAAAGAAAAACCCCCAGGTGTGAATGTAAATTCATCGGTAAAG
+GACGCCACAAGTACGCAGATAGATGCACCAGCAAATGAGGCTTCAAATGAGGTAATAAAA
+TGCTTTAGTTTCAGTTCTGACCCAATATTCGAAGCATACTTTGTTAAAGCAGTAGAAGTT
+GAGAAACAAGCACAACAGGAATATTGGAGAAAGCAATATCCTGACGCCGATATACCATGA
+>SAT4 1812 residues Pha 0 Code 0
+ATGACTGGTATGAATGATAATAATGCCGCTATTCCTCAGCAAACTCCAAGGAAACATGCG
+CTATCTTCTAAAGTTATGCAACTTTTTAGAAGCGGTTCAAGATCATCTAGGCAGGGAAAG
+GCCTCATCGAATATCCAGCCACCTTCTAATATAAACACAAACGTTCCATCGGCGTCTAAA
+TCAGCCAAATTTGGTTTACATACCCCAACCACTGCTACTCCTAGGGTAGTTTCTAATCCT
+TCTAATACTGCAGGTGTGAGTAAACCGGGCATGTATATGCCCGAATATTACCAGTCGGCA
+TCACCATCGCACTCTAGTTCATCCGCATCATTAAACAACCATATTGATATTAACACCTCT
+AAGTCATCATCAGCTGCTTCTTTAACTTCGTCAGTATCAGCTTTATCCTTATCACCCACA
+TCAGCCATAAATATTAGCTCCAAAAGTTTGAGCCCAAAGTTCTCTCATCATAGTAACAGC
+AATACTGCTATTACACCCGCGCCTACTCCCACTGCTTCAAATATTAATAATGTAAATAAG
+ATAACCAATACAAGTGCACCTATTTGTGGGAGGTTTCTTGTGCATAAAGATGGTACCCAT
+GAACATCACTTAAAAAATGCTAAGAGACAAGAAAAGCTAAGCACAATGATTAAAAACATG
+GTTGGTGCGAGCAAATTACGTGGTGAGGCAAAATCTGCTGTCCCTGATATAATAATGGAT
+CCAAAGACGACTTTAAAATCCAACAAGAATCCTCCTACTCTTTTTGCAGGCTTCATGAAG
+CAGGTCGTGGATATGGATGATAAATATCCAGAAGGCGCTCCCACAAGTGGCGCTTTAAAT
+TGTCCTGAAAGGGATATATACAGGTCAGATCAAAAAGATTCCAAAAATAATACGCATAAT
+ATCACTACTACTAAAAAAGATAGGCAATGTTTTGCCGAAAAGTATGGTCGCTGTCAAGAA
+GTCCTTGGTAAAGGTGCTTTTGGTGTAGTAAGAATATGTCAAAAGAAAAATGTTTCTTCT
+CAAGATGGTAATAAAAGTGAAAAGCTTTATGCAGTGAAAGAGTTCAAGCGTAGAACATCC
+GAATCAGCAGAAAAGTATTCTAAGAGGTTGACTTCTGAATTTTGCATTTCTTCTTCATTA
+CACCATACAAATATTGTTACTACACTAGATCTTTTCCAAGATGCCAAAGGCGAGTACTGT
+GAAGTAATGGAATATTGTGCAGGTGGCGATCTATTCACTTTGGTCGTTGCCGCCGGAAAA
+TTAGAATATATGGAAGCAGATTGTTTCTTCAAGCAGCTTATTAGAGGTGTTGTTTATATG
+CATGAAATGGGTGTTTGTCATAGAGATTTGAAGCCTGAGAACTTACTGCTTACGCACGAT
+GGTGTGCTAAAAATTACAGACTTTGGTAACAGCGAATGTTTCAAGATGGCATGGGAAAAA
+AATATTCACCTTAGTGGAGGCGTTTGCGGTTCATCGCCGTACATCGCCCCAGAGGAATAT
+ATCAAAGAAGAGTTTGATCCAAGACCCGTAGATATATGGGCATGTGGTGTCATTTATATG
+GCAATGAGAACTGGTAGACAATTGTGGAGTTCTGCTGAAAAAGACGATCCATTTTATATG
+AATTATTTAAAAGGACGTAAGGAAAAGGGAGGCTATGAGCCAATCGAAAGTTTAAAAAGA
+GCCAGGTGTAGGAATGTTATATATTCGATGTTAGATCCCGTTCCGTACAGAAGAATTAAC
+GGGAAACAAATTTTGAACAGTGAATGGGGAAGGGAGATAAAATGCTGCCATAATGGGCGC
+GCATTGAAATAA
+>RVS161 798 residues Pha 0 Code 0
+ATGAGTTGGGAAGGTTTTAAGAAAGCTATCAACAGAGCTGGTCACAGTGTGATAATTAAG
+AATGTCGACAAGACCATTGATAAAGAGTATGACATGGAAGAACGTCGTTATAAAGTTCTT
+CAAAGAGCAGGTGAGGCATTACAAAAGGAAGCCAAAGGTTTCTTGGACTCATTGAGAGCT
+GTGACAGCATCACAGACTACCATTGCCGAGGTCATCTCTAACCTCTATGACGATTCAAAA
+TATGTTGCTGGTGGTGGTTACAACGTTGGTAACTATTATTTGCAATGTGTTCAAGATTTT
+GATAGCGAAACTGTTAAGCAATTAGACGGGCCCTTAAGAGAAACCGTACTAGATCCAATA
+ACAAAGTTTTCGACGTATTTCAAAGAAATTGAGGAGGCCATAAAAAAGAGAGACCATAAG
+AAACAAGACTTCGATGCTGCGAAGGCAAAAGTTCGTAGATTAGTGGACAAACCTGCTAAA
+GATGCCTCTAAACTGCCAAGGGCTGAAAAAGAATTGAGCTTAGCTAAAGATATTTTCGAA
+AATCTTAATAACCAATTGAAAACTGAACTACCACAGTTAGTTTCATTAAGAGTACCTTAC
+TTTGACCCAAGTTTTGAAGCTTTAATCAAGATTCAGCTAAGGTTCTGTACTGATGGTTAC
+ACTCGTTTAGCGCAGATTCAACAATATTTGGACCAACAATCAAGAGACGACTATGCCAAT
+GGGTTATTAGACACTAAAATCGAAGAACTATTAGGACAAATGACAAGCCTAGATATTTGT
+GCGCTCGGGATAAAATAA
+>YCQ0 852 residues Pha 0 Code 0
+ATGTCTGACAAGGAACAAACGAGCGGAAACACAGATTTGGAGAATGCACCAGCAGGATAC
+TATAGTTCCCATGATAACGACGTTAATGGCGTTGCAGAAGATGAACGTCCATCTCATGAT
+TCGTTGGGCAAGATTTACACTGGAGGTGATAACAATGAATATATCTATATTGGGCGTCAA
+AAGTTTTTGAAGAGCGACTTATACCAAGCCTTTGGTGGTACCTTGAATCCAGGGTTAGCT
+CCTGCTCCAGTGCACAAATTTGCTAATCCTGCGCCCTTAGGTCTTTCAGCCTTCGCGTTG
+ACGACATTTGTGCTGTCCATGTTCAATGCGAGAGCGCAAGGGATCACTGTTCCTAATGTT
+GTCGTCGGTTGTGCTATGTTTTATGGTGGTTTGGTGCAATTGATTGCTGGTATTTGGGAG
+ATAGCTTTGGAAAATACTTTTGGTGGTACCGCATTATGTTCTTACGGTGGGTTTTGGTTG
+AGTTTCGCTGCAATTTACATTCCTTGGTTTGGTATCTTGGAAGCTTACGAAGACAATGAA
+TCTGATTTGAATAATGCTTTAGGATTTTATTTGTTGGGGTGGGCCATCTTTACGTTTGGT
+TTAACCGTTTGTACCATGAAATCCACTGTTATGTTCTTTTTGTTGTTCTTCTTACTAGCA
+TTAACTTTCCTACTGTTGTCTATTGGTCACTTTGCTAATAGACTTGGTGTCACAAGAGCT
+GGTGGTGTCCTGGGAGTTGTTGTTGCTTTCATTGCTTGGTACAACGCATATGCAGGTGTT
+GCTACAAAGCAGAATTCATATGTACTGGCTCGTCCATTCCCATTACCATCTACTGAAAGG
+GTAATCTTTTAA
+>ADP1 3150 residues Pha 0 Code 0
+ATGGGAAGTCATCGACGTTATCTCTACTATAGTATATTATCATTTCTATTATTATCCTGC
+TCAGTGGTACTTGCAAAACAAGATGAGACCCCATTCTTTGAAGGTACTTCTTCGAAAAAT
+TCGCGTCTAACTGCACAAGATAAGGGCAATGATACGTGCCCGCCATGTTTTAATTGTATG
+CTACCTATTTTTGAATGCAAACAGTTTTCTGAATGCAATTCGTACACTGGTAGATGTGAG
+TGTATAGAAGGGTTTGCAGGTGATGATTGCTCTCTGCCCCTCTGTGGCGGTCTATCACCG
+GATGAAAGCGGTAATAAGGATCGTCCCATAAGAGCACAAAATGACACCTGTCATTGTGAT
+AACGGATGGGGAGGGATCAATTGTGACGTTTGTCAAGAAGATTTTGTCTGTGATGCGTTC
+ATGCCTGATCCTAGTATTAAGGGGACATGTTATAAGAATGGTATGATTGTAGATAAAGTA
+TTTTCAGGTTGTAATGTGACCAATGAGAAAATTCTACAGATTTTGAACGGCAAAATACCA
+CAAATTACATTTGCCTGTGATAAACCTAATCAAGAATGTAATTTTCAGTTTTGGATAGAT
+CAGTTAGAAAGCTTCTATTGTGGCTTAAGTGATTGTGCCTTTGAATACGACTTGGAACAG
+AATACCTCCCATTATAAGTGTAATGACGTTCAATGCAAATGCGTTCCCGACACTGTGTTG
+TGTGGTGCTAAGGGGTCTATAGATATCTCGGATTTCCTGACAGAGACAATAAAAGGGCCA
+GGAGATTTCAGCTGTGATTTAGAAACAAGGCAATGTAAATTCAGTGAGCCTTCTATGAAT
+GATTTGATATTGACCGTGTTTGGTGACCCTTATATTACTTTGAAGTGTGAATCCGGTGAA
+TGTGTTCATTATAGTGAGATTCCAGGTTACAAATCTCCTTCAAAAGATCCAACAGTGTCA
+TGGCAAGGGAAATTGGTGTTGGCATTGACTGCTGTGATGGTCCTGGCACTTTTTACATTT
+GCTACCTTTTACATTTCTAAATCTCCGTTATTCAGAAATGGATTGGGTTCCTCAAAGTCT
+CCCATTCGTTTGCCAGATGAAGATGCGGTGAATAATTTCTTACAAAATGAAGATGACACA
+CTGGCGACATTAAGTTTTGAAAATATCACTTATAGTGTCCCCTCGATAAATTCAGATGGT
+GTTGAAGAAACTGTGCTGAATGAAATAAGTGGTATCGTGAAGCCCGGCCAAATATTAGCT
+ATCATGGGTGGATCTGGTGCGGGTAAAACTACTTTATTAGATATCCTAGCAATGAAACGG
+AAAACAGGTCACGTTTCGGGTTCCATAAAAGTTAACGGTATTAGTATGGACCGTAAATCT
+TTCTCGAAAATAATCGGGTTCGTCGATCAAGATGACTTTTTGCTGCCCACTTTGACTGTT
+TTTGAAACCGTATTAAATAGTGCGCTGTTAAGATTGCCAAAAGCATTGTCATTCGAGGCC
+AAGAAGGCAAGAGTTTATAAGGTGTTGGAAGAACTAAGAATTATTGATATCAAAGATCGT
+ATTATTGGTAATGAATTTGATCGTGGTATTAGTGGAGGTGAAAAACGCCGAGTTTCCATT
+GCATGTGAATTAGTGACATCTCCATTGGTTTTATTTTTGGATGAACCTACATCTGGTTTA
+GATGCTAGTAATGCCAATAATGTTATTGAATGTTTGGTAAGGTTATCCAGCGACTATAAC
+AGGACATTGGTGCTATCTATTCATCAGCCAAGATCAAATATATTTTATTTATTCGATAAA
+TTGGTCCTGTTAAGTAAAGGTGAGATGGTCTATTCCGGAAATGCCAAAAAAGTGTCAGAA
+TTTTTGAGAAATGAGGGATATATCTGTCCGGACAACTATAATATTGCTGATTATTTGATT
+GATATTACTTTTGAAGCCGGTCCTCAGGGGAAAAGGAGAAGAATCAGAAACATTTCCGAT
+TTAGAAGCTGGTACGGATACTAACGATATTGATAATACGATACACCAAACAACATTTACT
+AGCAGTGATGGTACAACACAGAGAGAGTGGGCTCATCTTGCAGCTCATAGAGATGAGATC
+AGATCTTTACTCAGAGATGAAGAAGATGTAGAGGGAACAGATGGAAGGCGAGGTGCTACT
+GAGATTGACTTAAATACCAAACTACTACACGATAAATATAAAGATAGCGTCTATTATGCA
+GAGCTTTCACAGGAGATCGAGGAAGTTTTAAGCGAAGGTGATGAGGAAAGTAACGTTTTG
+AATGGAGATTTACCCACAGGTCAACAATCTGCTGGTTTTCTGCAACAGTTATCGATATTG
+AATTCAAGAAGTTTTAAAAACATGTACAGAAACCCTAAACTATTATTGGGTAATTATTTA
+CTGACGATCCTATTGAGTTTATTCTTGGGAACACTATATTACAACGTCTCCAATGATATC
+AGCGGTTTTCAGAACAGAATGGGGCTGTTCTTCTTTATACTAACGTACTTCGGTTTTGTT
+ACATTCACAGGTCTCAGCTCGTTCGCTCTGGAAAGGATCATTTTCATAAAAGAAAGATCC
+AATAACTATTACTCGCCACTTGCATACTACATTAGTAAGATAATGAGCGAAGTGGTCCCG
+CTACGTGTTGTACCACCTATACTCTTGTCATTGATTGTTTACCCAATGACTGGTTTAAAC
+ATGAAAGACAATGCTTTTTTTAAATGTATTGGAATCCTTATACTGTTTAACCTTGGGATA
+TCGTTGGAAATCCTAACCATCGGCATAATTTTTGAAGACTTGAATAACTCCATAATATTA
+AGCGTGCTGGTGCTTTTGGGCTCACTACTGTTTAGCGGACTATTTATCAATACTAAGAAT
+ATTACAAACGTGGCCTTCAAGTACCTGAAAAACTTCTCTGTGTTTTACTACGCCTACGAA
+TCTTTATTGATCAATGAGGTCAAAACATTGATGCTGAAAGAGAGAAAGTACGGCTTAAAT
+ATTGAAGTTCCAGGCGCTACTATCTTGAGCACATTTGGATTTGTTGTCCAAAACCTTGTA
+TTTGACATCAAGATCCTGGCTCTGTTTAATGTGGTGTTTTTAATAATGGGGTATCTAGCC
+CTTAAGTGGATAGTTGTGGAACAAAAGTAG
+>PGK1 1251 residues Pha 0 Code 0
+ATGTCTTTATCTTCAAAGTTGTCTGTCCAAGATTTGGACTTGAAGGACAAGCGTGTCTTC
+ATCAGAGTTGACTTCAACGTCCCATTGGACGGTAAGAAGATCACTTCTAACCAAAGAATT
+GTTGCTGCTTTGCCAACCATCAAGTACGTTTTGGAACACCACCCAAGATACGTTGTCTTG
+GCTTCTCACTTGGGTAGACCAAACGGTGAAAGAAACGAAAAATACTCTTTGGCTCCAGTT
+GCTAAGGAATTGCAATCATTGTTGGGTAAGGATGTCACCTTCTTGAACGACTGTGTGCGT
+CCAGAAGTTGAAGCCGCTGTCAAGGCTTCTGCCCCAGGTTCCGTTATTTTGTTGGAAAAC
+TTGCGTTACCACATCGAAGAAGAAGGTTCCAGAAAGGTCGATGGTCAAAAGGTCAAGGCT
+TCCAAGGAAGATGTTCAAAAGTTCAGACACGAATTGAGCTCTTTGGCTGATGTTTACATC
+AACGATGCCTTCGGTACCGCTCACAGAGCTCACTCTTCTATGGTCGGTTTCGACTTGCCA
+CAACGTGCTGCCGGTTTCTTGTTGGAAAAGGAATTGAAGTACTTCGGTAAGGCTTTGGAG
+AACCCAACCAGACCATTCTTGGCCATCTTAGGTGGTGCCAAGGTTGCTGACAAGATTCAA
+TTGATTGACAACTTGTTGGACAAGGTCGACTCTATCATCATTGGTGGTGGTATGGCTTTC
+ACCTTCAAGAAGGTTTTGGAAAACACTGAAATCGGTGACTCCATCTTCGACAAGGCTGGT
+GCTGAAATCGTTCCAAAGTTGATGGAAAAGGCCAAGGCCAAGGGTGTCGAAGTCGTCTTG
+CCAGTCGACTTCATCATTGCTGATGCTTTCTCTGCTGATGCCAACACCAAGACTGTCACT
+GACAAGGAAGGTATTCCAGCTGGCTGGCAAGGGTTGGACAATGGTCCAGAATCTAGAAAG
+TTGTTTGCTGCTACTGTTGCAAAGGCTAAGACCATTGTCTGGAACGGTCCACCAGGTGTT
+TTCGAATTCGAAAAGTTCGCTGCTGGTACTAAGGCTTTGTTAGACGAAGTTGTCAAGAGC
+TCTGCTGCTGGTAACACCGTCATCATTGGTGGTGGTGACACTGCCACTGTCGCTAAGAAG
+TACGGTGTCACTGACAAGATCTCCCATGTCTCTACTGGTGGTGGTGCTTCTTTGGAATTA
+TTGGAAGGTAAGGAATTGCCAGGTGTTGCTTTCTTATCCGAAAAGAAATAA
+>POL4 1749 residues Pha 0 Code 0
+ATGTCTCTAAAGGGTAAATTTTTCGCCTTTTTACCTAATCCTAACACATCTTCCAATAAG
+TTCTTTAAGAGTATATTGGAGAAAAAGGGCGCCACAATTGTGTCAAGTATTCAAAATTGT
+CTTCAATCTAGCCGTAAGGAAGTTATCATTTTGATTGAGGACTCCTTTGTTGATTCTGAT
+ATGCATTTGACTCAGAAAGATATTTTCCAAAGGGAAGCAGGCTTAAATGATGTCGATGAA
+TTTCTTGGTAAGATTGAACAGTCAGGCATTCAATGTGTGAAAACCAGTTGCATCACAAAG
+TGGGTCCAGAATGATAAATTTGCGTTTCAAAAAGATGATTTGATTAAATTTCAACCATCC
+ATTATCGTTATATCAGATAACGCTGATGACGGACAAAGTTCTACTGATAAAGAGAGTGAG
+ATTTCAACTGACGTAGAAAGTGAAAGGAATGATGACAGCAACAATAAGGATATGATACAA
+GCTTCAAAACCTCTTAAGCGACTTTTACAGGAGGATAAAGGAAGAGCTTCCCTTGTTACT
+GACAAAACGAAGTACAAAAACAATGAATTGATTATCGGAGCGTTGAAAAGGTTAACAAAA
+AAATATGAGATCGAAGGTGAGAAATTTCGTGCAAGAAGTTATAGACTGGCTAAACAGTCG
+ATGGAAAATTGCGATTTCAATGTTCGTTCCGGTGAAGAAGCACATACTAAATTAAGGAAT
+ATCGGGCCTAGTATTGCCAAAAAAATACAAGTTATATTAGATACGGGAGTTTTACCAGGT
+TTAAATGATTCAGTGGGATTAGAAGACAAGTTAAAATACTTCAAAAATTGTTACGGCATT
+GGGTCGGAAATTGCTAAACGCTGGAATCTTCTAAATTTTGAAAGCTTTTGTGTTGCAGCT
+AAGAAGGATCCAGAGGAGTTTGTATCAGATTGGACAATTTTATTTGGTTGGTCATATTAC
+GACGATTGGTTATGCAAGATGTCTCGGAATGAATGTTTCACACATTTAAAGAAGGTTCAA
+AAAGCGCTGCGTGGCATTGATCCTGAATGCCAAGTCGAATTACAGGGAAGTTATAATAGG
+GGCTATTCCAAGTGTGGTGACATTGATCTTTTATTTTTCAAGCCGTTTTGTAATGACACG
+ACCGAGTTGGCAAAAATCATGGAAACGCTTTGTATTAAGTTGTACAAGGATGGCTATATC
+CATTGTTTTTTACAGCTAACGCCAAACTTGGAAAAGCTATTCTTAAAAAGAATAGTGGAG
+AGATTTCGTACAGCGAAGATTGTTGGGTATGGAGAAAGAAAGAGGTGGTATTCTTCTGAG
+ATAATCAAGAAATTTTTCATGGGAGTCAAATTCTCTCCAAGAGAATTAGAAGAACTGAAA
+GAAATGAAAAATGATGAAGGCACATTGTTAATTGAAGAAGAAGAAGAAGAAGAAACAAAA
+TTAAACCCGATTGACCAATATATGTCTCTGAATGCCAAGGATGGAAATTATTGCAGAAGA
+TTAGACTTTTTTTGTTGCAAGTGGGATGAGCTTGGAGCAGGAAGAATACACTATACTGGA
+TCTAAAGAGTACAATAGATGGATAAGAATATTGGCAGCGCAAAAAGGCTTCAAGCTTACA
+CAACACGGTTTATTTCGAAATAATATCCTTCTCGAAAGCTTTAACGAACGCAGAATTTTC
+GAGTTATTAAACTTAAAATACGCTGAACCCGAACATAGAAATATCGAATGGGAAAAAAAA
+ACTGCATAA
+>YCQ7 2862 residues Pha 0 Code 0
+ATGCTGATCATCAATGGGAAGATCATCCCTATAGCTCATACTATTTGCGCATTCTCCGCC
+TTCTTTGCAGCTTTGGTCACTGGTTATTCATTACATTTTCATAAAATTGTAACCAATGCA
+CATTATACGTATCCAGATGAGTGGTTTCCTAGTGTATCAGCCACTATCGGGGACCGCTAT
+CCGGAACGTTCTATTTTCCAAATCTTAATAGCTCTAACTGCTTTTCCAAGATTTTTACTG
+CTACTAGGTCACTACTACTTGAACCAATCTAAGGTATGCTTCCTTGTCGGTGTACTCCGG
+ACAGTCTCTTGCGGTGGTTGGGTATACATTACAAGTACAGATGACCACGATATTCATGAT
+ATATTTATGATCACATACATTGTTTTAACGTTACCATGGGATATAATGATTACCCGCTAT
+TCTAGTCCTTTAACTTCGAAGAACAAAGGGTTGACTGCTACAATTTTTTTTGGAACATTG
+TTCCCGATGATTTACTGGTACATTCAGCACTCCGTCCAACAGAGAGCTGGGGCATATTCT
+ATATATGCTTATTTCGAATGGTCTCTGATTCTTTTAGATATTGCATTTGATGCATTTGCT
+TACGCTGATTTCAAAAAGATAGATATTGTTCTCGCTTTTAATGAGAAACCCGGTAATACC
+AGTTTTTTCCAAATTAGAGACTCTAATCCCATAAATTATGGAGAAGAAAAAAGTTCAGAA
+TTGCAGAAAAGTGGTGAAAAGAAGGTTGAAAAGGAAAAACCCGTTGCTAGAAGCGCAACT
+GGTTCATATTTCAGGTTTGACTCTTTTTTTTACTTACTAACAAATATTTTTAACGGTTTT
+CTTTTCTGGTCGAACGTTACGTCCCTTTTATGTAGTATTTGGCATTTCCCGCTATGGTAT
+ATGGGAATCTCAGGTTATGAAGCTGCAATATTGGGTTATTTGGGACCCATTTTCTTATAT
+CTGCCGTTCGTTTCTGAAGCCTTCATGCAATATGGTGTACTTTTAGGAGGTATTATTGCC
+ATTGGTGCCTATATTGTTCAGATGCCAGAATTAAGGTTGATTTCTGTAGCTGTGGGAACT
+TCCATTACCGTTGCAACGTTTGTACAAAATCTAAGATATATCACAAATGCGGAGACTAGT
+TTCTCTTTTGCTCTAACTTGGCTGCTAGGTCTTGTTGCATCTGTGATCTTGAAAATGGGG
+TTCTATACCAACAACCCAACTTGGGTCATTTTAGATGAACGTAATGGTGGGTATAATAAG
+ACAGCTCTCGTGCTTACTGTTTTATTCGGCATGCTGTCGCCTTATGTTAATTCAATTAAT
+TTCGAAGGGAAAAGGAATGCTCAAGCAAAATCTGCTTCGTTGATCGGCAAATTATTTTTG
+GCTGTTGGTTTTGGCTCGTTGTTATTCGGAATTCATCAGTTATTGACGGATTCTTCTACT
+ACTATTTATTGGGCATGGGAAGGTTACAATGAATCACACGGTCCCTTGCCATGGCCTTGG
+GGCGCCTTAACTTGTACGGTCATGTTATTTGCTTCTTTGAGTTCTGTGAAGTTTATGGGC
+AAGCCATTAGTTCCATGTTTGTTGCTTCTCATATCCACTGCTGTACTTTCAGCTAGAAGC
+ATTACACAATGGCCTAAATATATTTTTGGTGGTTTATTGTACGCTATCGCTATGCTTTGG
+TTAGTTCCTTCGTATTTTTCTGCATTAGGCCAAGTTCAAAACATATGGGTTTATGTCCTA
+TCATTCTCCGTTTATATTATCTTTGTCCTTGCCCATGTTTGGGTCGTTGCATACGCATTT
+GTTCCAATGGGCTGGGTACTGAGGGAGAAGATTGAGACGGTTCTTGCCTTTTCTTCCACA
+TTTATCATTATTGGTGCTTTAACATGCAAAAACCTTAACGTTCAACTGGTGACTATGGGC
+AAAAAATTCTTCATTTATGTTTTCTTCTTTGCCGTGGCCCTACTATCACTAACAGCTAGG
+TTCGTGTATGATATTAGACCTACAGGAATTCCTCAGCCTTATCATCCAGATTCTCAGTTG
+ATTACAGCTGGTATTTGGACTATCCACTTTGGTCTCGATAATGATATGTGGGCATCTGAA
+GACAGAATGATCAACCTTATTAAAGATATGGAACTAGATGTGGTAGGTCTACTAGAAACA
+GATACACAAAGAATTACCATGGGGAACAGGGATCTAACTAGCAAACTAGCTCATGATTTG
+AATATGTATGCAGATTTCGGACCAGGTCCAAATAAACATACCTGGGGCTGTGTTCTTCTT
+TCTAAATTCCCTATCGTAAATTCTACGCATCATTTATTGCCCTCTCCAGTTGGGGAACTT
+GCGCCAGCCATTCATGCCACACTTCAAACGTACAATGACACTCTCGTTGACGTCTTTGTA
+TTCCATAGTGGACAAGAAGAGGATGAAGAGGATAGAAGACTGCAAAGTAACTACATGGCT
+AAGCTCATGGGCAATACGACTCGCCCAGCTATTTTATTAAGTTACTTAGTTGTTGATCCA
+GGTGAAGGCAACTACAATACGTACGTTAGTGAAACATCCGGAATGCACGACATTGATCCC
+TCTGACGATGATAGATGGTGTGAGTATATCTTGTATAAGGGCTTGAGAAGAACAGGATAT
+GCTAGAGTTGCAAGAGGAACGATAACCGATACGGAGCTACAAGTTGGTAAGTTCCAAGTT
+TTGAGTGAGCAAGCGTTAGTAGAGCACTCGGATTCTATGTATGAATACGGTCATATGAGT
+GAACCGGAATATGAGGACATGAAATTTCCAGATAAGTTTTTAGGCGAAGGTGAGAGGGGT
+CACTTCTACCATGTTTTTGATGAGCCACGTTATTACTTATAA
+>SRD1 678 residues Pha 0 Code 0
+ATGCGATATAATAATTATGACAACTCTGGAAGTTCCTTCTTAACTAGAGTAGTTAAAAAG
+TCAGATATGGAGAAAACGTTATTATTAAATAGAGAAATTGATGACTGGAAGTCAAACGAT
+AAAAAGAAGGCATATAAGGAACGCGGAAGAGTTTATGCAAGTTGCTCATTTATTGAAGTA
+TCCTTTTCTCAAATAAGGGCTGTTGATGTTGAAAAAAAAATTGAGAATGCCGAACAACTA
+AGAGATCTTACAAGAAATATTGTTAAGAACAAAACCAGCTCTTTGAACGAAATTACACCC
+TCAAAGAATCGTGTTATTAGTGCATGCAATTCCGAGAGACGTACGACTAGCCAAGAAGCA
+AACAATCTTGAAGGCTACCATAGTTGTGCACAAGGAACTAGTCGGTCTGCCAGTATTACG
+AAGAAATACAGCAAAAAGACTACTAGTCGTCCTAAAAGAGAAAAGAGACAAACAATCCTC
+CCAAATGGTGAGATAAAGGAATGCTCTAAATGTAAAGACACTTGGACAATTCAATGGCGT
+AGTGGACCCGACCAAAACAGGGAACTTTGTAGTCCCTGTGGACTCGCCTATGGAAAAAGA
+CTGAAGAAGGAGAATGAAAAAAAAAGGCAAGCGGCAGATAAAAGGATAGATTCGAAACAA
+TCCATAGTATCTATTTAA
+>MAK32 1092 residues Pha 0 Code 0
+ATGATGAATGAAGAGGATTCTACAGAAACGAAAAGCCTAGTCATAACTAATGGCATGTTT
+ATCATAGACGACATCGAGCGTAGTAAATATAATATTCACTATAAGAATGTCCCAGGAGGC
+GGAGGGACTTTTGCCATTTTGGGTGCATGCATAATATCTTCCGGCAATGTCACATCCAAA
+GGTTTGAAGTGGATAGTGGACAGAGGCTCTGACTTTCCAAAGGAAGTTATAAGGGAAATA
+GACTCATGGGGTACTGATGTGAGGTTTCGAGATGACTTTAGCAGATTAACTACCAAAGGG
+TTGAATTATTACGAGGGAAGTGATGATTTGAGAAAGTTCAAGTTTTTGACGCCGAAGAAG
+CAGATTAACGTCGATGACTGGATTTCCACATTTGGGCAGAAGATAATTGATGAAATGCAT
+GCGTTTCATTTGCTATGTTCTGGGTCTAGATGCTTAGACATAATAAACGATCTGCTACGG
+GTGAAAAGTTCAAAGGGCACAAAACCAATCGTGATTTGGGAGCCATTCCCAGATCTTTGC
+GACTTTGATCATCAAAATGACATTAAAAGTGTAATGCAGAGGAACGATGTTACGGTAATA
+TTATCTCCAAATGCCGAAGAATCAAGTCGCTTATTTGGTTTAAGTAGCAAGGAACCGACT
+AGTTTGGAAGAATGTCTAGCATTAGCGCATCGTTTCGATGATTTCATGGATGAAAACAAT
+ATGTGTATTCTACGATGCGGTGCCCTCGGAAGCATATCGGTAAGTGAGAAGTTTAAGAAC
+GGACGAACCTATGACCATTTCCCCGCCTACCATTTCAAAACTCAGTCTAAAGTACTAGAT
+CCTACTGGCGGGGGAAACTCGTTCCTTGGCGGCTTTGCAGTTTCTTATGCCCTAACGAAA
+AGCTTAGATATTGCTAGTATATGTGGGAACATCGCTGCAGGCGCAATAATTGAACAATTC
+GGAATACCGAGGTACGATCCAATTGCTAAAACCTGGAACGGAATCACATTCTTGGATAGA
+CTGAAATTTTACCTTTCACAGTCCGGTCTTCAATATAATATAAACGATCTTTACAAAAGT
+CTAACACGATGA
+>PET18 648 residues Pha 0 Code 0
+ATGAGCTGTACCACTGATAAGTTAATACAAAAGTACGACGCCCTTGTTAGGAAAACCACA
+GAACATAAATTCGCTAAGGAACTATGTGCCGGAACATTGAAGGACCGTAGTTTGTACATC
+TATTTATCACAAGATCTGCAATTTTTTGAAACTAGCTTAAGGTTGATATGTAAGACGACT
+TCTTTAGCACCAACTACTCACGCTTTAATAACCTTAGCCAAAAAGATTGGATTTTTTTCT
+AATGATGAAAACTCATACTTTCATGACTGCTTAGAATTATTGGCACCATCCCTCACCAAG
+GAAGAAAGAGATAATTTTGACAATAAAGCGATCCCCGGCGTTGATGCGTATATTAATTTC
+TTAGATGAGCTGAGAAAGGACGCCTCAATTACATGGCCATCCTTAGTAACCAGCTTATGG
+GTTGCTGAGGAACTCTATTGGAGATGGGCTCGTGATACTCCTAGAGCCCCAGGGTTGCAT
+TGGAAATATCAAAAATGGATTGATTTACATGATGGTGAGCATTTTCAAACTTGGTGTGAA
+TTTCTAAAGGCTGAAGTTGACAAGTTTCCCGTCGAAGAAGTGGAAAGCATATTTGTGAAG
+GTTTCACAGTTCGAGTTCGAATTTTTTGAATCTTGTTACAACGCCTAA
+>MAK31 267 residues Pha 0 Code 0
+ATGGACATCTTGAAACTGTCAGATTTTATTGGAAATACTTTAATAGTTTCCCTTACAGAA
+GATCGTATTTTAGTTGGAAGCTTGGTTGCTGTAGATGCCCAAATGAATTTGCTATTAGAT
+CATGTTGAGGAACGTATGGGCTCCAGTAGTAGAATGATGGGCCTAGTCAGCGTCCCTAGG
+CGTTCCGTTAAGACCATAATGATTGATAAGCCTGTTCTGCAGGAGCTTACTGCGAATAAA
+GTTGAATTGATGGCTAATATTGTTTAG
+>HSP30 999 residues Pha 0 Code 0
+ATGAACGATACGCTATCAAGCTTTTTAAATCGTAACGAGGCTTTAGGGCTTAATCCACCA
+CATGGCCTGGATATGCACATTACCAAGAGAGGTTCGGATTGGTTATGGGCAGTGTTTGCA
+GTCTTTGGCTTTATATTGCTATGCTATGTTGTGATGTTCTTCATTGCGGAGAACAAGGGC
+TCCAGATTGACTAGATATGCCTTAGCTCCTGCATTTTTGATCACTTTCTTTGAATTTTTT
+GCTTTCTTCACTTATGCTTCTGATTTAGGTTGGACTGGTGTTCAAGCTGAATTTAACCAC
+GTCAAGGTTAGCAAGTCTATCACAGGTGAAGTTCCCGGTATTAGACAAATCTTTTACTCG
+AAATATATTGCCTGGTTCTTGTCCTGGCCATGCCTTTTATTTTTAATCGAGTTAGCCGCT
+AGTACTACTGGTGAGAATGACGACATTTCCGCCTTGGATATGGTACATTCGCTGTTAATT
+CAAATCGTGGGTACCTTATTCTGGGTTGTTTCGCTATTAGTTGGTTCATTGATCAAGTCC
+ACCTACAAGTGGGGTTATTACACCATTGGTGCTGTCGCTATGTTGGTTACCCAAGGTGTG
+ATATGCCAACGTCAATTCTTCAATTTGAAAACTAGAGGGTTCAATGCACTTATGCTGTGT
+ACCTGCATGGTAATCGTTTGGTTGTACTTTATCTGTTGGGGTCTAAGTGATGGTGGTAAC
+CGTATTCAACCAGACGGTGAGGCTATCTTTTATGGTGTTTTGGATTTATGTGTATTTGCC
+ATTTATCCATGTTACTTGCTAATTGCAGTCAGCCGTGATGGCAAATTGCCAAGGCTATCT
+TTGACAGGAGGATTCTCTCATCACCATGCTACGGACGATGTGGAAGATGCGGCTCCTGAA
+ACAAAAGAAGCTGTTCCAGAGAGCCCAAGAGCATCTGGAGAGACTGCAATCCACGAACCC
+GAACCTGAAGCAGAGCAAGCTGTCGAAGATACTGCTTAG
+>YCR3 1836 residues Pha 0 Code 0
+ATGGCGCGTCAAAAGCTTACTTTCAAAGAACAAATGGATGGTTTCCCCTGGGTCCAACTT
+GTTGTTGTGTCCTTAGTTAGGTTCAGCGAACCAATTGCGTTTTCGTCACTATTTCCTTAT
+GTTTATTTCATGGTTAGAGATTTTAATATTGCTCCCAATGATGCTCAAGTGTCCAAATAT
+TCAGGTTATTTATCTTCATCATTTGCGTTATGCCAAGTCATATCTGCGTACCACTGGGGT
+AGATTCTCTGAAAAACATGGCAGAAAAATAACATTGACTTGCGGGCTTATAGGAACATCT
+GTATCATTGTTAATACTGGGATTTTCACACAATTTCTATCAGGCTTTGGTGGCAAGAAGT
+TTAATGGGATTGCTAAATGGTAACGTCGGCGTTATTAGAACCATTATTGGTGAAATAGCA
+ACTGAAAGAAAACATCAGGCTTTAGCTTTCAGTACTATGCCTTTATTATTTCAATTTGGT
+GCCGTTGTTGGGCCTATGATCGGTGGGTTTCTTGTATTTAGAGATGGAACAATGAATGAA
+GTGCCACTATGGTTTCCACATTTTGCAAAAAGAATAATTAGGTCATATCCGTACGCCTTG
+CCAAACGTGGTAGTGTGCATGTTTTTGATGTTTGGTTTAACTAATGCAACATTGTTTTTG
+GAAGAAACACATCCTGCTTTTAAAAATAGAAGAGATTACGGTTTAGAGGTCGGTGATTTT
+ATTAAGAAGAATATATTTGGTATACAGCCGAAAAGAAGACCCTGGCAAAAGCGCATTCAG
+GATGATTCGGAAAACATTCACCACCGTAATGAGAATGTGAACAGCAATCGAGGACAAGAT
+AGTGAAGAGGATGAAAATAGTCCCCTAGTGAATACTACCAATGACGATGATACTGAAAGC
+ATACAATCGATTGATCCTATTTTAACAAGAAGACAGTCTGTAGGCCTGATTAGGACATAT
+TCTCTGCATGAACCAACAGACGCTGTGCATGCCAATATAGATACAGCTCCAGACGGTTGT
+AAAGAAAGTAGTATATTTCATCACGTTTTTCATACAAAAGTATTTTACCCTATATCGGTG
+AATTTTATTATGGCTTTACATTTGATTGTATACAACGAATTTTTGCCTGTTTTTTTAGCT
+TATGATTTAGCCGTAGATCCAGAAAATCCAAAGAAGCTGGCTTCAAAATTTCCGTGGAAA
+ATATCTGGCGGTATAGGTTATGAACCAGAACAAACCGGTACTCTTTTGTCGACAACAGGT
+ATCTTTGGTTGTTTTGTGGTTATTTTCATTTTTCCCATAGTTGATCGAAATTTCGATTGT
+TTAACAATTTTCAGAACTTTAGTCAAGCTGTACCCTATTATGTACGTTATGGTTCCTTAC
+GTTGTTTTTCTACAGAATGAACGGATTCCTAGCTGGTATACTGTCGTCTACTTGTACATA
+ATCACAGGGATAAAAACATTTTGTGGCGCTTTAACGTCACCACAAATTATGTTATTAATT
+CATAATTCGAGTCCCTTGAGTTGTAGATCAGTCATCAATGGCGCCACCATTAGTATTTCT
+GCCTCTGCTCGTTTCATAGGTCCCTTAGTATGGGGCTATATTATGTCTTGGTCCCAGCAA
+AATGACGTCGCCTGGGTCAGTTGGTGGTCGTTAAGTCTTTTTTGTATGGTAGCTCTTTAT
+CAAAGTTATAAGATAGCACCAATTGATGATAACGAAAATGAGCTTCATGGACAGGGTAGT
+GAAGATGCCTACAATTCGCAGTCACAGTCTTCTGATTTAAGAATGGCTCATCGATCTAGT
+TTAAGCAGCTTAAGTAACCAACGCTGTACCACATGA
+>SYN 1479 residues Pha 0 Code 0
+ATGTTTCATGCTTTCACCTTCCTTAAAGGTGGTAGATTTTACTCTTCACTAACAGTTAAA
+TCATTGTACGAGCAGGTACACCATACTAGCCATGATCCCATTTCAATTAATGGATGGATC
+AAATCCATAAGACTATTAAAACGTATAGCGTTTTTGGATTTACAAGATGGGACTTCTGTG
+AACCCATTAAGAATAGTTATTCCACTCACAAATACTGATGAAGTACAGTTCCTAAAAATT
+CTGAAAACTGGTCAAACTTTATCTATATCTAATGCTACCTGGCAAAGCACCCCTAATAGA
+AAACAACCTTTTGAATTGCAAATCAAAAATCCTGTCAAGTCAATTAAACTTGTGGGTCCC
+GTTTCAGAAAACTATCCATTACAAAAGAAATATCAAACCTTACGTTATTTAAGGTCCTTA
+CCTACACTAAAATACAGAACCGCTTACTTAAGTGCAATTTTACGGTTAAGATCATTTGTA
+GAATTCCAGTTCATGCTATATTTCCAGAAAAACCACTTCACCAAAGTTTCACCACCAATA
+TTAACTTCAAACGATTGTGAAGGTGCCGGCGAGTTGTTTCAAGTCTCCACCAATACGTCG
+CCAACTGCATCCTCGTACTTTGGGAAGCCGACTTATTTGACTGTGTCCACTCAATTGCAC
+TTGGAAATTTTAGCGTTATCACTGTCAAGGTGTTGGACGTTATCTCCTTGCTTTAGAGCC
+GAAAAGAGTGATACTCCAAGACACCTTTCGGAGTTTTGGATGCTTGAAGTGGAAATGTGC
+TTTGTTAATAGCGTCAACGAGCTAACATCGTTTGTTGAGACTACAATAAAACACATAATT
+AAAGCTTGTATAGATAACCAACAAGAACTCTTGCCGAAGCAATTTATCTCTTCACAAGAA
+AATAATGCATCGTCAGAGCTATCAATAAATCAAGAGACACAACAAATTAAAACACGATGG
+GAAGATTTAATAAATGAAAAATGGCACAATATAACGTATACCAATGCAATAGAAATTCTC
+AAGAAACGCCACAATGAAGTTTCACACTTTAAGTATGAACCTAAATGGGGACAGCCTTTG
+CAAACTGAACATGAAAAATTTTTAGCCGGAGAGTATTTTAAGTCCCCAGTTTTCGTTACC
+GACTATCCACGTCTTTGTAAACCATTCTACATGAAACAAAATTCCACTCCTGACGATACT
+GTTGGATGCTTTGATCTACTGGTTCCTGGAATGGGTGAAATAATTGGTGGGAGTTTAAGG
+GAAGATGACTATGACAAGTTATGTAGAGAAATGAAAGCACGCGGGATGAATAGATCTGGA
+GAATTGGACTGGTATGTTTCTCTGAGAAAAGAAGGAAGTGCACCACACGGAGGCTTTGGT
+CTAGGGTTTGAGAGATTTATCTCATACTTATATGGCAACCATAATATAAAGGATGCCATA
+CCCTTTTATAGAACATCTGCAGAATCCATCGATTTTTGA
+>YCR6 2232 residues Pha 0 Code 0
+ATGGAACTTCAGAATGATTTAGAGTCGCTCGATAACGAGCTGAATGATTTTAGTGAAGAT
+CCATTTCGTGATGATTTCATAACGGATGAAGACGCTGTAAGATCGGGGTGGCGATCTGCG
+TGGACCAGGATGAAATATTGGTTTTATAAGAATAGACTGAAGTGGACAAACAATCCCATA
+GTGATTGGCGACGCGAAAGATAGTAGGGATGGTTCTAACTTTAGAAGGGGTATACCGCTA
+TATGAATTAGACGCGAATGGTCAACCCATTGATACTGAACTTGTTGATGAGAATGAACTT
+TCTTTTGGAACGGGATTTCGTTCCAAAGTGCCTTTTAAAATAATATTTCGCACATTGCTT
+GGCTCGCTGGTGTTTGCCATTTTTTTAATTCTGATGATTAACATAGCAAAACCCCATCAC
+TCCACGAGAGTGCTATCGCACTTTGGCAGTCCTGAATTTGACCCTTACGTGAAGTATTTT
+AACGGTACGCATGAATTTTTCCCCTTAACGATAGTAATTTCACTAGACGGTTTCCATCCT
+TCACTCATATCTAAGAGGAACACACCGTTTTTACATGACTTATATGAATTGAAATATGAT
+GGAGGTATGAATATCACGTCCACACCTTTTATGATACCCAGCTTCCCTACGGAGACCTTT
+CCCAACCATTGGACGTTGGTTACTGGACAATACCCAATACACCACGGTATAGTCTCTAAC
+GTATTTTGGGATCCTGATCTTAATGAAGAATTCCATCCAGGTGTATTGGACCCTCGAATA
+TGGAACAATAATGATACAGAACCAATATGGCAAACTGTTCAGTCTGCATTTGACGGTGAT
+ATACCATTCAAAGCTGCTACCCATATGTGGCCAGGTAGCGATGTGAATTATACCAAGTAT
+AAGACTGAAGAGAAACTACAACCTGAACATAAAAAGCCTATTGCTAGAGAGAGAACTCCA
+TTTTACTTCGACGAATTCAATGCTAAAGAACCACTTTCGCAAAAATTATCCAAGATTATT
+GAATATGTGGATATGAGTACACTGAACGAAAGACCACAGTTAATTCTCGGTTATGTACCG
+AACGTAGATGCCTTTGGACATAAGCATGGATATCCGTCAGAGTCGGAATACTATTATGAA
+GACTTCACTGAAACACTGGGGGAAGTAGATACATTTCTGAAGCAACTAGTGGAATCGCTG
+CAAGAAAGAAATTTAACCAGCTTTACTAATTTGGTCATTGTTAGCGATCATGGTATGAGC
+GATATCGTAGTTCCCTCAAATGTTATTATATGGGAAGACTTACTGGACGAAAAATTGAGG
+AAGGATTATGTATCGCACGCATATCTAGAGGGTCCGATGATGGCTATATCGTTGAAAGAT
+TCCGGAAACATCAATGAGGTTTACCACAATTTAAAGACTTCTATAGATGAAGACAAGTAT
+ACGGTTTACGTTAATGGAAATTTCCCCAAAGAATGGAACTTTAATGATGGAAAAAATCAT
+CACATGGCGTCAATCTGGATTGTGCCCGAGCCTGGGTATGCAGTGATGAAGAAAGAACAA
+TTGAAGAAGGTGGCAAAAGGTGATCATAAGGACAAAAACGAAGACAATGTGTTCACGATT
+GGATCACATGGATACGACAATAACGCGATCGATATGAGATCTGTATTTATTGGTATGGGG
+CCATATTTTCCACAGGGATACATTGAGCCGTTCCAAAATACCGAAATTTACAACCTTTTG
+TGCGATATTTGCGGTGTGGCAGAAAAGGACAGAAATTCCAATGATGGGACTGGGATGCTT
+ATGAACCAACTCCGCGAACCCCAGAGCAGCGAAGAAGTAGAGATTGAAGATGACTTTGAT
+TATTTGGTCAGTAAGTTTGGTGAATTCAGCACTTATAATATAATTTGGGGCGGGTACCCC
+GAAGAGACAGAACAAGACAATGTTGACAATGATAATGATGACAACGACGATGGAAACACT
+GATGAAATAGCCGCTATGCCATCTTCGTCATTAACGATAAAACTAGAAATGACAACTTCA
+ATACCATCAGCAACTGAGACTCTACCGGGCGAAACATCACCATCATCAAGAAGAAGCAGC
+AGCAGCAGCATACAAGCTAGCGCTACTGCTAGCACAGTGGGGGATTGGCTTCAAGACATA
+ATCAACGACGCAAAAGATCTCATTGACGACATAATTGACAGCATCGACGATTTAGTCGAT
+TCTGATACCTAA
+>GNS1 630 residues Pha 0 Code 0
+ATGGAATACGCCACTATGTCTTCTTCGAACTCCACACATAACTTTCAGAGAAAGATTGCT
+CTTATAGGAGCTAGAAATGTCGGCAAAACCACATTAACGGTTCGCTTCGTAGAATCGCGG
+TTCGTTGAATCCTATTATCCCACTATTGAAAATGAATTTACCAGGATAATTCCTTATAAA
+AGTCATGACTGTACTCTGGAAATTCTAGATACTGCAGGCCAAGATGAAGTTTCTCTATTA
+AACATTAAATCGTTGACGGGCGTACGAGGCATAATGCTGTGCTATAGTATAATAAATCGT
+GCTAGCTTTGATCTTATTCCCATTCTCTGGGACAAGCTGGTAGATCAGCTGGGTAAGGAT
+AACCTCCCGGTAATACTTGTGGGTACCAAAGCTGATTTGGGAAGGAGTACAAAAGGTGTA
+AAAAGGTGTGTCACGAAAGCTGAAGGAGAGAAACTAGCTTCGACAATTGGCAGTCAAGAT
+AAGAGGAACCAGGCAGCATTTATAGAATGCAGTGCCGAGTTAGATTATAATGTTGAAGAA
+ACTTTTATGCTCCTTTTGAAACAAATGGAACGTGTCGAAGGAACTCTGGGGCTTGATGCC
+GAAAATAATAATAAATGTTCTATAATGTGA
+>FEN2 1539 residues Pha 0 Code 0
+ATGATGAAGGAATCGAAATCTATCACTCAACATGAGGTTGAGAGAGAATCTGTTTCTTCC
+AAACGTGCCATTAAAAAGAGATTACTTCTGTTTAAAATAGACTTGTTTGTGCTATCATTT
+GTTTGCTTGCAATACTGGATTAATTATGTCGACCGTGTCGGTTTCACCAATGCATATATA
+TCGGGTATGAAGGAAGATCTTAAGATGGTCGGAAACGATTTGACCGTGTCTAACACAGTT
+TTCATGATTGGTTACATTGTAGGTATGGTCCCCAATAATTTAATGTTATTGTGTGTTCCA
+CCTAGGATATGGCTAAGTTTTTGTACGTTTGCCTGGGGTTTATTGACCTTGGGAATGTAC
+AAAGTTACATCGTTCAAACATATTTGCGCAATTAGATTCTTTCAAGCCTTATTTGAGAGT
+TGCACATTTTCAGGAACACATTTTGTTTTGGGTTCGTGGTATAAAGAAGACGAATTGCCC
+ATTAGAAGTGCTATTTTTACAGGTAGCGGTTTGGTGGGATCTATGTTCAGTGGATTTATG
+CAAACAAGTATCTTTACTCATTTGAATGGGCGGAATGGCTTGGCGGGTTGGAGATGGTTA
+TTCATTATTGATTTTTGTATCACATTACCCATTGCAATTTATGGGTTTATTTTCTTCCCC
+GGCCTTCCTGATCAAACAAGTGCTGTTAGCAAATTTTCTATGACGAGATACATTTTTAAT
+GAACAAGAGCTACATTATGCTAGGAGAAGGCTCCCCGCTAGGGACGAAAGCACCCGGTTA
+GACTGGTCGACTATTCCTAGAGTCCTAAAAAGGTGGCACTGGTGGATGTTCTCTCTTGTT
+TGGGTTCTGGGAGGTGAGAATTTGGGTTTCGCATCTAATTCTACATTTGCATTATGGTTA
+CAAAACCAAAAATATACGTTGGCGCAAAGAAATAATTATCCTTCGGGGATATTTGCCGTA
+GGTATAGTTTCTACGCTTTGTTCTGCTGTATATATGAGTAAGATCCCAAGAGCTAGGCAT
+TGGCATGTTTCTGTTTTCATATCATTGGTAATGGTTATTGTTGCGGTACTAATACGTGCA
+GACCCACTAAATCCAAAAGTCGTCTTTTCTGCACAGTATCTTGGAGGCGTAGCATACGCT
+GGACAAGCGGTTTTTTTTTCGTGGGCAAACATTATTTGTCATGCAGATCTTCAAGAACGT
+GCTATCGTTCTTGCTTCAATGAATATGTTTTCAGGGGCCGTTAACGCATGGTGGTCTATA
+TTATTCTTTGCTTCAGATATGGTGCCCAAGTTTGAGAGAGGTTGCTACGCCCTCTTGGCT
+ACGGCAATATCAAGCGGAATTGTCTCGGTCGTCATACGCTCACTACAGATAAAAGAGAAT
+TTGTCTAAGAAACAGGTTCCTTATATAGATGCTAATGACATGCCCGGGGAAGATGACGAT
+GACGACAACCAGGATAATGAAAATGATGGCGACGACGAGAGTATGGAAGTTGAACTTCAT
+AATGAGGAAATGGCCGAAATTTCAAATCCTTTCCGATAA
+>RIM1 444 residues Pha 0 Code 0
+ATGTTTTTACGTACTCAAGCTCGTTTCTTCCATGCTACTACCAAGAAGATGGACTTCTCG
+AAAATGTCCATCGTCGGCCGCATTGGCTCTGAATTCACTGAACATACTTCTGCTAATAAC
+AATCGTTATTTGAAATATAGTATCGCTTCGCAACCAAGAAGAGATGGCCAAACCAATTGG
+TATAATATCACCGTTTTCAATGAACCTCAAATCAATTTTTTGACAGAATATGTTAGAAAA
+GGCGCTTTGGTATATGTTGAAGCAGATGCTGCTAACTATGTCTTCGAGAGAGACGACGGT
+TCTAAGGGTACTACTTTGAGCTTAGTTCAAAAGGACATTAATTTATTGAAGAATGGGAAG
+AAATTAGAAGATGCTGAGGGCCAAGAAAATGCTGAGGGCCAAGAAAATGCTGAGGGCCAA
+GAAAATGCTGCTTCTTCAGAATAA
+>CRY1 414 residues Pha 0 Code 0
+ATGTCTAACGTTGTTCAAGCTCGTGACAATTCCCAAGTTTTTGGTGTTGCTAGAATTTAC
+GCTTCTTTCAACGATACTTTCGTTCATGTTACCGATTTATCTGGTAAGGAAACCATCGCC
+AGAGTTACTGGTGGTATGAAGGTTAAGGCTGACAGAGATGAATCTTCTCCATACGCTGCT
+ATGTTAGCTGCCCAAGATGTTGCCGCTAAGTGTAGGGAAGTCGGTATCACTGCCGTTCAC
+GTTAAGATCAGAGCTACCGGTGGTACTAGAACCAAGACTCCAGGTCCAGGTGGTCAAGCT
+GCTTTGAGAGCTTTGGCCAGATCTGGTTTGAGAATTGGCCGTATCGAAGATGTTACCCCA
+GTTCCATCTGACTCCACCAGAAAGAAGGGTGGTAGAAGAGGTAGAAGATTATGA
+>YCS2 6504 residues Pha 0 Code 0
+ATGAATTCAATTATTAATGCTGCTTCGAAAGTCTTAAGACTCCAAGACGATGTGAAGAAG
+GCTACTATAATATTAGGAGATATACTGATATTACAACCAATTAATCACGAAGTTGAACCA
+GATGTAGAAAACTTGGTACAGCATGAACTAACCAAGATAATACAAGGTTATCCCATACAG
+GATAATATGATTATTAATAGCAAAAAAGGCACAGTTGAAGATGACTTATGCGAACTCAAT
+AACTATACCTGTTTTGCACTTTCGAAAAGCTTTGATTTATGCCATGATAGCAGAAATTTC
+AACATAGCGCAGCCGAAACGATGGATACAATTATTAGAGACATTAACTGACTCAGTTAGT
+TTCGCAGTTATTGTTCAAATTATTCTCACTTTATCTAACATTTCGCTAATAAATAAACAA
+ACCTTGGGGAAGTTAAAAAAACTGAGGATTCGAATTTTCGAAATACTATCAAATAAAAAC
+GATAGTTGGAAATCTACATTACTACAGAAAAACCTTATAGAATGGTACATTTTTATGCTT
+TCCGTGGATTGCACACCTTTAGAATTGCAAAACTTATATCTCCATAAGGAGTTGAAATTC
+TGTAACGATATCTTGAATTCATTAACACTCCAAGTTTCTGATCCTCGCTCACAAAATTAC
+CTGCAATTTGAGAACACGTATAAGCTTTTTCAAATACAAAAGTCATCTAGAATTAACAAC
+TCGTTCCTTTTTTACATAGAATTCAATTCCGTTACCTCAAATAGGATAATGACCATAGAA
+AAACACATTTATTTGGAAATTAAGGAAGGCCAGTTTTGTATTTCAAATGATAACTACATA
+ATCGGTTTATTTGAAAACTTCGAATTCGAAGCGGGCACTTTGTACTTTATTGGAGTTTTA
+ATTGATCACAATAATCGAATAACTCTTTATGTTGATGGAAGTATGATCAATCAGCTCACG
+TTATTTGAAAACTCTATATGCCAATTAAGCACTTGTGAACTGGGATCCATGATTTGTTCA
+ATTAAAGTATATAGATTTTATTTGTGGGATGGATTATTAACAGAATTTGCGATAAATATA
+CTTCAAGCTATCGGCACCAATTACCAATATACATTTAGCAAGAAAAAAGAAGGGCCTGAA
+GTTTTATCGCTCTGCCAAGACTTTTTGATCGCTAAGGCTCATTTAATGGCCAGGCCTGCA
+ACAGAAATATCTTCCACAAAATACATCGATGAGATTGAACTTCTTGAAATGGAAAATATC
+ATTATTGATGTTAACCCAAATGATATTCTTCAAGATTTCACCGAATCGTCTAATTTTACG
+GTAAAATTTGAGGAAAGCACAAACTCGAAAAATATTCCGGAAGTGGGTAAGTGCTATTTC
+TATAGGAGTTCAAACTTGGTTTCAAAATTTGTGTCCATTGATTCTATACGGCTTGCGTTT
+TTAAACATGACAGAATCCGGTAGTATAGACGATCTGTTTCATCATGTATCACATCTGATG
+AATCTTTTACGAAATATTGATATTCTTAATTGGTTTAAAAAAGACTTTGGCTTCCCTTTA
+TTTGCTTATACTTTAAAACAAAAAATAACACAAGATTTATCTCAGCCTCTGAATATCCAA
+TTTTTCAATTTATTCTTAGAATTTTGCGGGTGGGATTTCAACGATATTTCCAAATCCATA
+ATTCTAGATACTGATGCCTACGAAAACATAGTCCTTAACTTGGATTTATGGTATATGAAT
+GAGGATCAAAGTTCTCTGGCGTCAGGCGGATTAGAAATTATCAGATTTCTTTTCTTCCAA
+ATTTCAAGTTTGATGGAAGCCTCTATTTATTCTAAGTTCAATTCCAATAAATTCAATGAT
+ATGAATATCCTAGAAAAACTATGTTTAAGCTATCAGGCTGTCACAAAAAGAGAAAATCAG
+AACAGTAAATTTAATGAGCTATCAAATGATTTAATTTCTGTATTTGTTACTTTATTGAAA
+AGCAATACTGATAAACGACACCTGCAGTGGTTTTTACATCTCTCATATTACTTTATTAAG
+AGAAAAGATGTACGTTCTACAGAAATTATACTTCAAGCGGTAGATCAACTTTTTTCGTTT
+TACTTAGATCAAGGTAGCGACGAAAATGCGAAGATACTTTCAGAGATTATACCACTTAAG
+CTAATGCTGATGATTATGGATCAAATAGTGGAAAATAATGAATCAAACCCTATTACGTGC
+TTGAATATCTTATTTAAGGTAGTTCTGACCAATAAACCGCTTTTCAAACAATTTTACAAA
+AATGATGGTTTGAAACTCATATTGACTATGCTTTGTAAGGTAGGGAAAAGCTATCGAGAG
+GAGATTATTTCTTTGCTTCTCACATATTCTATTGGCAATTATACCACAGCTAACGAAATA
+TTTTCAGGTGCTGAAGACATGATTGGAGGAATTTCAAACGACAAGATAACTGCAAAAGAA
+ATTATTTATTTGGCTGTCAACTTCATTGAGTGGCATGTGATTAATTCTAATGCCAGTGAT
+TCTTCTTCTGTATTGGACCTGAACAACCATATATTAAGATTCGTCGAAGATCTGAAATCG
+CTGAGCGCTGTTCCGATTAATGAATCTGTATTTGATCCTAAAAAAAGTTATGTGATGGTT
+TCATTATTAGATCTCTCGATAGCTTTGAATGAATCGGAGGACATCTCAAAGTTCAAGAGC
+TCTTCAAAAGTGATTTCAGAGCTCATTAAAGGTAATATAATGTGTGCTCTTACGAAATAT
+GCCGCTTATGATTTCGAAGTCTATATGAGCACATTTTTTTGTCACAGTACAGAATACAAA
+CTGGTTTATCCAAAAACTGTAATGAACAATTCCAGTTACTTAGAGCTATCATTTATAGTG
+ACACTCCTACCCGAAATACTTAATGACCTGATAGATAGCAATAACAATTTGAACCTGATG
+ATGTTGAAGCATCCATACACGATGTCAAATCTCCTTTATTTTCTTCGCAAATTTCGACCT
+GATACGTCACAGATAGTTATGCCTAAAGATTTTTATTTCTCAAGTTATACATGTCTCTTG
+CATTGTGTTATTCAGATTGATAAATCATCATTTTACCATTTCAAAAACGTTTCTAAGTCG
+CAACTGTTACAGGAATTCAAAATCTGCATAATGAACTTAATATATTCCAATACTCTAAAG
+CAGATAATCTGGGAGAAAGAAGAATACGAGATGTTTTCTGAGTCACTGATGGCGCATCAG
+GAAGTTTTATTTGCACATGGAGCATGTGATAATGAGACCGTTGGCTTATTGTTAATATTT
+TTTGCCAACAGATTACGTGATTGTGGATACAACAAAGCAGTCTTCAATTGTATGAAAGTG
+ATCATTAAGAACAAGGAAAGGAAACTAAAGGAGGTGGCGTGTTTTTTTGACGCAGCGAAT
+AAAAGTGAAGTACTCGAAGGTTTAAGTAATATCCTCTCATGCAATAACTCTGAAACAATG
+AACCTCATAACTGAACAATACCCATTTTTTTTCAACAATACACAACAGGTACGGTTCATA
+AACATTGTCACCAATATCTTGTTTAAGAACAACAATTTTTCTCCAATAAGCGTTAGACAG
+ATCAAAAACCAAGTTTACGAATGGAAAAATGCAAGATCAGAATACGTCACCCAAAACAAT
+AAAAAGTGCCTTATTTTATTTAGAAAAGACAACACATCCTTAGATTTTAAAATCAAAAAG
+TCCATATCAAGATACACTTACAACCTCAAAACGGATAGAGAAGAAAATGCAGTTTTCTAT
+CGAAATAATTTAAATCTTTTGATTTTTCATCTGAAACATACACTGGAGATACAATCAAAT
+CCAAATTCGTCCTGCAAGTGGTCATTGGACTTTGCAGAAGATTTTGATGGGATGAAACGG
+AGGCTTTTGCCTGCTTGGGAACCAAAATATGAACCACTCATTAACGAGGAAGATGCTAAT
+CAAGATACTATAACAGGTGGTAACAGACAAAGGAGAGAAAGTGGAAGCATTTTATCCTAC
+GAATTTATCGAACATATGGAGACTCTTGAGTCGGAGCCAGTTGGAGATTTGAATGAGAAT
+AGAAAAATTCTTAGACTTTTGAAGGATAACGATTCTATTGCAACTATTTGGAATTGCAGT
+TTGATTATTGGATTAGAAATTAAGGAGGGGATTTTAATTCATGGCAGTAATTACCTTTAC
+TTTGTAAGTGATTACTATTTTAGTTTAGAGGATAAAAAGATTCTAAAATTATCAGAAGTA
+TCGCAAGAATCACGGGATATGACGGTTAGCTTAATTAACGGCCCTGATGTTAAAAGGGTA
+TCAACTTTCCTAAAGCACGAAGTCTTTGTTTGGAAACTTCTCGATATCACTTTCGTTACC
+AAACGACCCTTTCTACTTCGGGATGTCGCCATCGAATTATTGTTCAAAGAGAGAGTTAGC
+GCTTTTTTTAGTTTTTACAACAAAAGAGTGAGAGATGACGTTTTACGGGTACTGAATAAG
+ATCCCGAAGCACCTTCCAGCAGATCCAATTTTTTCAAGCGTTTTACAAGAAATAAACGAC
+CGAGGAAATAGTATAGTGGCAAGAAATGGAATAGGAAAGGCAAGCATTGCTTCCAAATTC
+ACTAGCGTCTTCTCAGCGAACAACAGCCTAATAGATGGATTTGAGATCAGCAAAAAATGG
+GTTAGGGGAGAGATTTCTAATTTTTATTACCTGTTGAGTATCAACATCCTAGCGGGAAGG
+TCATTCAACGATTTGACCCAATATCCAGTGTTTCCGTGGGTTATTGCAGATTACGAAAGT
+AACGTACTCGATTTAGAGAATCCTAAAACTTACCGGGACCTATCGAAACCTATGGGCGCT
+CAAAGTGAGAAAAGGAAATTACAGTTTATAGAGCGTTATGAAGCTTTGGCTTCCCTGGAA
+AATGCTGATTCCGCACCATTTCATTATGGCACGCATTATTCCTCAGCTATGATAGTATCT
+TCATATCTGATAAGGCTGAAGCCCTTTGTCGAATCCTTTTTGTTATTGCAAGGCGGAAGT
+TTTGGCCCTGCAGATCGTTTATTTAGTTCGCTTGAAAGGGCCTGGAGCTCTGCTTCTTCT
+GAAAATACAACGGATGTCAGGGAATTGACACCTGAATTTTTTTTTCTACCTGAATTTTTG
+ATCAACGTTAATAGTTATGACTTTGGTACAGACCAAAGCGGTAAAAAAGTTGACGACGTC
+GTACTTCCACCCTGGGCAAATGGTGACCCAAAGGTTTTCATTCAAAAGAATAGAGAAGCT
+TTAGAAAGTCCTTATGTATCAGCACATTTACATGAATGGATTGATTTGATATTTGGTTAC
+AAACAAAAGGGGGAAATTGCTGTGAAATCTGTTAACGTATTCAACAGATTGAGTTACCCA
+GGCGCTGTAAATCTAGATAATATTGACGATGAAAATGAGCGCAGAGCTATCACAGGCATT
+ATTCACAACTTTGGTCAAACGCCTTTACAAATATTTCAGGAACCTCATCCGGAAAAAATA
+GCCTGCAATGTTCAACAGCTAACAACAGAGGTATGGCGTAAGGTTCCAATGAAGCCAATA
+TTTGAGAAGACAATCTTTAATTTGAATGAAAAGAACAGGTCTGTCGATTATGTTATACAC
+GATCCTAGTTACTTCGATTCATTATACTGGAGGGGCTTCGCTTTCCCAAACTTGTTTTTC
+AGAACGGAAGAATCGTTAGTGTCATTGAGAATTGTGCATAAAAATTGGTTAAAAATTGGA
+CTAGATATTTTTAAAAAGACGCATATGGCTCAGATTACATCGTTTGCGTACTGGAAGTTG
+GGCGAATTCATAACTGGTGATAAAAATGGGCTGATAAAAGTTTGGAAATATCGTAAAGAT
+AAGCATTCGGTTTCAGGTAACCTTGAGAACAAAAAAACAATGTTTGGGCACCTATGCGAG
+CTAAAGGAAATGCGCTGTTATCACGACTACAATACGCTTTTAACCTTAGACATCAGCGGC
+TTAGTATATGTCTGGGACATGATTAATTTCGAACTAGTGAGACAAATAACAAATGATGCG
+CAAAAGGTCGCAATATCTCAACATGCAGGGAGCATTATGGTATTGACTAAGAATAACGCC
+ATTTCGATCTTCAATCTAAATGGACAAATATATACATCAAAGAAATTCGAACCAGCTAAA
+ATTGTAAGCTCAATTGATTTTTTTGACTTCACTAAGTTAGACGCAGGTTACAGAAAGCAT
+ATCTATTGGAAAGAGATGGAAATACTACTAGTGGGCTTTGAAGATGGAACTATAGAAATT
+TACGAGCTCTTTTTGACTTTTCATAATGAATGGGCGATAAAGCTACTGAAACAGCTCTGT
+ACCGAAAGAGGGAAAGCCATAACTAGCATTAAGGGACAGGGGAAGACATACCTGTCCCAG
+AAAAGACGCAAGGATACAGCAGAGCCTCATGAGATAGAAGTGATTGCGGGAACATTAGAT
+GGCAGATTAGCTATTTGGTACTAG
+>YCS3 3681 residues Pha 0 Code 0
+ATGGGGTATCCGCCACCTACACGAAGGCTTGGAGATAAGAAAAGGTACCATTATTCCAAT
+AATCCTAACCGAAGGCATCCTTCCGCTGTTTATTCCAAGAATAGCTTTCCAAAATCAAGC
+AATAATGGATTTGTATCTTCTCCTACTGCCGATAATTCAACAAATCCGTCTGTAACTCCC
+AGTACTGCATCTGTACCTCTTCCTACAGCGGCACCTGGAAGCACGTTTGGTATCGAAGCA
+CCCAGGCCATCTCGATATGATCCGAGCTCAGTCAGTAGGCCTTCGTCATCATCTTATTCG
+TCAACAAGAAAAATTGGAAGCCGTTATAACCCAGATGTGGAAAGATCCTCTTCAACCACT
+AGTTCAACTCCGGAAAGTATGAATACGAGCACCATAACACACACCAATACGGATATCGGA
+AACTCACGCTATTCTCGAAAAACCATGAGCAGATATAATCCTCAATCTACTAGTTCTACA
+AACGTTACCCACTTTCCCTCGGCATTATCAAACGCTCCACCGTTTTATGTTGCCAACGGG
+AGTTCTCGGAGACCTCGATCAATGGATGATTATAGTCCTGATGTAACGAACAAGCTCGAA
+ACAAATAATGTTTCATCTGTTAATAATAACAGCCCTCATTCTTATTACTCTAGGAGCAAC
+AAATGGAGATCCATTGGAACGCCTTCCAGACCACCATTTGATAATCATGTCGGCAATATG
+ACGACCACCAGCAATACTAACTCGATCCATCAAAGGGAACCTTTTTGGAAAGCAAATAGT
+ACTACTATTTTAAAATCAACTCATTCACAGTCATCGCCTTCCCTTCATACTAAAAAATTT
+CACGATGCGAATAAATTGGACAAACCAGAGGCTTCAGTTAAAGTTGAAACACCCAGTAAA
+GATGAGACAAAAACCATATCGTACCATGATAACAATTTTCCACCAAGAAAATCAGTTTCT
+AAACCTAATGCACCTTTAGAACCCGATAATATCAAGGTTGGCGAAGAAGATGCATTGGGG
+AAAAAAGAAGTACATAAAAGTGGGCGTGAGATAGCAAAGGAACATCCTACTCCTGTAAAA
+ATGAAAGAGCATGATGAACTAGAAGCTCGCGCTAAAAAAGTAAATAAAATCAATATTGAT
+GGAAAGCAGGACGAAATTTGGACGACAGCAAAAACAGTGGCCAGTGCAGTCGAAGTTTCC
+AAAGAAAGTCATAAGGAACTAACACGCTCTGTTGAAAGGAAGGAAAGTCCAGAAATTAGA
+GATTATGAAAGAGCATACGATCCGAAAGCCCTGAAAACAGACGCAACAAAGTTGACAGTA
+GACGATGATAATAAAAGTTACGAAGAACCTCTTGAAAAAGTGGAAGGGTGTATTTTCCCA
+TTACCAAAAGCAGAAACGAGATTATGGGAATTGAAAAACCAGAAAAGAAACAAAATAATA
+AGTAAACAAAAGTACTTACTGAAAAAGGCAATTAGGAATTTCTCAGAGTATCCTTTTTAC
+GCACAGAACAAACTTATACATCAGCAGGCTACCGGACTTATCTTGACGAAAATTATATCA
+AAGATAAAAAAGGAGGAACATTTGAAAAAAATAAATTTAAAACATGATTATTTCGATCTC
+CAGAAGAAGTATGAAAAAGAATGCGAAATTTTGACTAAACTGAGTGAAAATTTAAGGAAG
+GAAGAAATCGAAAATAAACGTAAAGAGCACGAATTAATGGAGCAGAAAAGACGTGAAGAA
+GGTATCGAAACAGAAAAAGAAAAAAGCTTACGGCATCCATCCTCGTCTTCCTCATCTCGT
+CGCAGAAATAGGGCTGACTTCGTTGATGATGCGGAAATGGAAAATGTATTGCTACAAATC
+GACCCAAATTATAAACATTATCAGGCTGCTGCAACAATTCCTCCGCTAATTTTAGATCCA
+ATCCGCAAATACTCTTACAAATTCTGTGATGTAAATAACTTGGTTACAGACAAAAAGCTT
+TGGGCGTCTAGAATATTGAAAGACGCCTCTGACAACTTTACTGACCATGAGCACTCTTTA
+TTTTTGGAGGGTTATTTAATTCATCCTAAAAAATTCGGTAAAATTTCTCACTACATGGGC
+GGCTTAAGAAGTCCTGAAGAGTGTGTCCTACATTATTATAGAACAAAGAAAACTGTGAAT
+TATAAACAACTTCTTATCGATAAGAACAAGAAAAGAAAAATGTCAGCCGCTGCGAAGCGC
+CGCAAGAGGAAGGAAAGAAGTAATGACGAGGAAGTCGAAGTTGATGAGAGTAAAGAAGAG
+TCAACGAACACGATAGATAAGGAAGAAAAAAGTGAGAACAATGCCGAGGAAAATGTTCAG
+CCGGTTCTAGTTCAAGGTTCTGAAGTGAAAGGTGATCCATTAGGTACACCGGAAAAAGTT
+GAAAATATGATTGAAAAGAGAGGCGAAGAGTTTGCAGGTGAATTGGAAAATGCTGAGAGG
+GTAAATGACTTAAAAAGGGCGCATGATGAAATTGGAGAAGAGAGCAATAAGTCCAGTGTA
+ATAGAAACCAACAATGAGGTACAAATAATGGCTCCAAAAGGAGGTGTTCGGAATGGTTAT
+TATCCAGAGGAGACCAAAGAACTTGACTTCAGTTTAGAGAATGCGTTACAGAGAAAGAAA
+CACAAATCTGCACCAGAGCATAAAACAAGTTATTGGAGTGTTCGTGAATCTCAACTCTTT
+CCAGAATTGTTGAAGGAGTTTGGCTCTCAATGGTCTCTCATATCAGAAAAACTGGGTACC
+AAATCTACTACAATGGTAAGGAATTACTACCAAAGAAATGCAGCTCGCAATGGATGGAAA
+TTACTGGTTGATGAAACCGACTTAAAGCGAGATGGGACTAGTTCAGAATCTGTACAACAA
+TCTCAAATTTTGATACAACCAGAACGACCAAACATCAATGCCTATAGTAATATTCCTCCT
+CAACAAAGACCGGCTTTGGGTTATTTTGTTGGACAACCAACTCATGGGCATAATACATCT
+ATTTCATCTATCGATGGCTCTATAAGACCATTTGGGCCTGATTTTCATCGTGATACCTTT
+TCTAAAATTAGTGCTCCTTTAACCACTTTACCACCACCAAGACTACCATCTATTCAGTTT
+CCTCGTTCAGAAATGGCAGAACCTACAGTGACAGATTTGCGTAACAGGCCCTTAGACCAT
+ATTGACACGTTGGCTGATGCAGCTTCGTCAGTAACAAATAATCAAAACTTCAGTAATGAA
+AGGAATGCAATTGACATTGGCCGTAAATCGACGACAATCAGCAATCTATTGAATAATTCG
+GATCGAAGCATGAAATCTTCTTTCCAAAGCGCTTCAAGACACGAAGCACAGCTCGAAGAC
+ACTCCCAGCATGAACAATATTGTAGTACAAGAAATAAAACCGAATATTACTACGCCAAGA
+TCGAGTTCTATTTCTGCATTACTAAATCCTGTAAATGGGAATGGGCAATCAAACCCAGAT
+GGAAGGCCGTTGCTGCCATTTCAGCATGCTATTTCTCAAGGCACTCCTACTTTCCCTTTA
+CCGGCCCCTCGCACTAGTCCAATAAGTCGTGCGCCTCCAAAGTTCAATTTTTCGAATGAT
+CCGTTGGCAGCTTTGGCTGCGGTTGCCTCCGCGCCAGATGCAATGAGCAGTTTTTTATCT
+AAAAAGGAAAATAATAATTGA
+>GNS1 1044 residues Pha 0 Code 0
+ATGAATTCACTCGTTACTCAATATGCTGCTCCGTTGTTCGAGCGTTATCCCCAACTTCAT
+GACTATTTACCAACTTTGGAGCGACCATTTTTTAATATTTCGTTGTGGGAACATTTCGAT
+GATGTCGTCACTCGTGTAACTAACGGTAGATTTGTTCCAAGCGAATTCCAATTCATTGCA
+GGTGAATTACCATTAAGCACTTTGCCCCCTGTGCTATACGCCATCACTGCCTATTACGTT
+ATTATTTTTGGTGGCAGGTTTTTGTTAAGTAAGTCGAAACCATTTAAATTAAATGGCCTT
+TTCCAATTGCATAATTTGGTTTTAACTTCACTTTCATTGACGCTTTTATTGCTTATGGTT
+GAACAATTAGTGCCAATTATTGTTCAGCACGGGTTATACTTCGCTATCTGTAATATTGGT
+GCTTGGACTCAACCGCTCGTTACATTATATTACATGAATTACATTGTCAAGTTTATTGAA
+TTTATAGACACCTTTTTCTTGGTGCTAAAACATAAAAAATTGACATTTTTGCATACTTAT
+CACCATGGCGCTACTGCCTTATTATGTTACACCCAATTGATGGGCACCACATCTATTTCT
+TGGGTCCCTATTTCATTGAACCTTGGTGTTCACGTGGTTATGTATTGGTACTATTTCTTG
+GCTGCCAGAGGCATCAGGGTCTGGTGGAAGGAATGGGTTACCAGATTTCAAATTATCCAA
+TTTGTTTTGGATATCGGTTTCATATATTTTGCTGTCTACCAAAAAGCAGTTCACTTGTAT
+TTCCCAATTTTGCCACATTGTGGTGACTGTGTGGGTTCAACAACTGCCACCTTTGCAGGT
+TGTGCCATTATTTCTTCATATTTGGTACTATTTATTTCATTTTACATTAACGTTTATAAA
+CGTAAAGGCACCAAAACCAGTAGAGTGGTAAAGCGTGCCCACGGCGGTGTTGCCGCAAAG
+GTTAATGAGTATGTTAACGTTGACTTGAAAAACGTTCCTACTCCATCTCCATCACCAAAA
+CCTCAACACAGAAGAAAAAGGTAA
+>RBK1 1002 residues Pha 0 Code 0
+ATGGGTATTACAGTAATAGGTTCTCTAAACTATGATTTGGACACATTTACGGATAGATTA
+CCTAACGCTGGAGAAACTTTCAGGGCTAACCACTTCGAAACACATGCTGGTGGTAAGGGA
+TTGAACCAAGCTGCGGCCATTGGTAAATTAAAAAACCCCAGCAGCAGATATAGTGTTCGA
+ATGATTGGTAATGTTGGAAATGATACATTTGGTAAACAATTGAAGGACACTTTATCCGAT
+TGCGGAGTCGATATCACTCACGTCGGTACTTACGAAGGCATTAATACGGGTACCGCTACC
+ATATTAATTGAAGAGAAAGCTGGTGGCCAAAATAGGATATTGATTGTAGAAGGTGCTAAC
+AGCAAGACTATTTATGACCCGAAACAGTTGTGTGAAATTTTTCCAGAGGGCAAGGAGGAA
+GAAGAGTATGTTGTTTTTCAACACGAAATTCCTGATCCTCTTTCCATTATTAAATGGATA
+CATGCGAACAGGCCGAATTTTCAGATCGTATATAACCCCTCACCTTTCAAGACCATGCCT
+AAGAAAGATTGGGAGTTGGTAGACCTTTTGGTCGTTAATGAAATTGAGGGTCTTCAAATC
+GTGGAAAGTGTATTTGATAATGAACTTGTTGAAGAAATAAGGGAGAAGATAAAGGACGAC
+TTTTTAGGAGAATATCGTAAAATTTGTGAGCTTTTGTATGAAAAACTCATGAATCGAAAG
+AAAAGAGGAATTGTGGTTATGACTTTGGGTTCGAGAGGGGTGCTTTTCTGTTCGCACGAA
+AGCCCTGAAGTACAATTCCTTCCGGCTATTCAAAATGTTTCGGTTGTTGATACTACAGGA
+GCTGGAGATACTTTCCTGGGCGGTTTGGTTACTCAATTGTATCAAGGAGAGACCTTGTCT
+ATGGCTATAAAGTTCTCTACATTAGCTAGTTCATTGACCATTCAAAGAAAAGGTGCTGCT
+GAAAGCATGCCACTGTATAAAGATGTTCAGAAAGATGCATAA
+>PHO87 2772 residues Pha 0 Code 0
+ATGAGATTCTCACACTTTCTCAAATACAACGCTGTCCCTGAATGGCAGAATCATTACCTA
+GATTATAACGAATTGAAAAATTTGATCTACACATTACAGACAGATGAATTGAAACAAGAA
+ACGCCAACCGGTGACTTAAACGATGACGCTGACTCTCAGACTCCAGGTCCAATCGCTGAT
+ATAGAAAGCAACATAGCTGCAGGAGAACCATCTCCATCGAAAAGAAGATTTACACATAAA
+CTCAAGCGTAAGCTCTTTGGTTCTAAAACACCTTCAGGAAGCAAAAGGGGAGACTCCGAC
+GAAAAGGCCATAGATGGGAACAATATTAACGAGGAAACAATTGAGTTAGACGAGTTATCT
+CCTCAAGGGAAAACCACCTCTTTCAATAAGAATTTTATACGTAAGAAATTCTTTGAATCA
+CGCAGCTCATCTGTGAGTAGCGAGGGAAAGACGCTCTTCAGTTCTTATGATACATTCGTA
+ACTAACCTGAGCGACGAGAAATTGAAAGTAGATGATTTCTACAAAAGAATGGAAGCTAAG
+TTCTATGAAAGATTTGACCACTTGATTAATGATTTGGAGAAGGAAGGCATTGTAACAAGA
+TTGAATGAAACTTTCAATCCTGAAATTCAAGCATTGCCTCCTTTAAGAGAAATTATTTCT
+GGTACATCAGAGACACATTCATCTAATAACCCATTTGAAATACACTCTTCAAACATCGAC
+AGTGAATTGAGAAATAGGTTTGATTACAGCGAAGAAGAAATGGATGAAGATGATGACGTT
+GACGTGTTTGCTGACACTACCGACAATACCGCCCTCTTGAATTATTCGCAATTTAACATT
+AAATCTCAGAAAAAATCATTATTAAAACAGACAATAATAAATCTTTACATAGACCTTTGC
+CAGTTGAAATCTTTTATCGAATTGAACAGAATGGGTTTCAGTAAAATTACTAAGAAGTCT
+GATAAAGTATTGCACATGAACACTAGGCAAGAATTAATAGAAAGTGAAGAATTTTTCAAA
+GACACCTACATCTTCCAGCATGAAACTTTAAGCAGTTTAAACAGTAAAATTGCACAACTT
+ATTGAATTTTATGCTGTTCTCATGGGTCAGCCTGGGAACGTAGATTCATGCAAGCAAGAG
+TTAAAGTCGTACCTGCACGACCACATTGTTTGGGAAAGAAGCAACACATGGAAAGACATG
+TTGGGCCTCTCTTCGCAAAATAACGATATAATAACTATTGAAGATGAAGCTGAGAAACTT
+ATGCAAGAAAAGCTTCAAATTGAATATTTCAAGTATCCATTGCCTAAGCCAATTAATTTG
+AAGTTTACTAAAATTGAAAATTTGGCAGTTCCTAAGCTATTTTTTGGGAAAAGAGCAATG
+AAAATAGGCTTCATTATCATTGTCACAGGTGTTTTGTTGGGTGTTAAAACTTTCAATGAC
+CCTGTCGAACACCGGTGTATGGCATTGGTAGAATGCTGTGCTTTCTTATGGGCTAGTGAA
+GCCATTCCATTACACATCACAGGTTTATTGGTTCCCCTTCTAACTGTCCTTTTTAGGGTA
+CTAAAAGACGATGACGGTAAGGTAATGGGAGCAGCAGCTGCCTCTACAGAAATCTTAGGT
+ACAATGTGGTCGTCAACAATTATGATTTTATTAGCAGGTTTCACATTGGGTGAAGCCTTG
+TCGCAATATAACGTTGCGAAAGTTTTGGCATCGTGGTTATTGGCCCTTGCAGGTACCAAG
+CCAAGAAATGTCCTTTTAATGGCAATGAGTGTTGTATTCTTTCTTTCGATGTGGATTTCC
+AACGTTGCCTCCCCAGTATTGACATATTCTCTATTAACACCCTTACTAGATCCGCTGGAC
+TACACTTCACCGTTTGCTAAGGCATTAGTCATGGGTGTTGCACTTTCGGCAGATATTGGT
+GGTATGGCTTCACCTATTTCTTCGCCACAGAATATCATCTCCATGCAGTACTTAAAACCT
+TATGGAATCGGCTGGGGGCAATTTTTTGCTGTCGCTCTGCCTACAGGTATTCTATCGATG
+CTGTGCTCCTGGGCCTTGATGATACTCACCTTTAAAATAGGCAAAACTAAACTGGAAAAA
+TTTAAACCAATAAGGACCAGATTTACTATAAAGCAATATTTTATCATCATTGTAACTATT
+GCTACTATTCTTCTATGGTGTGTAGAGTCACAAATAGAAAGTGCTTTTGGATCGTCCGGT
+GAAATTGCAGTAATACCGATAGTCCTGTTTTTTGGTACAGGTCTACTATCAACAAAGGAT
+TTCAACACATTCCCTTGGTCAATTGTTGTTCTTGCTATGGGTGGTATAGCCCTTGGTAAG
+GCAGTTTCATCTTCAGGCTTGTTGGTAACTATTGCAAGAGCATTACAAAAGAAAATTCAG
+AACGATGGTGTTTTTGCTATCTTATGTATTTTCGGTATTTTAATGTTAGTTGTGGGCACT
+TTTGTCTCACATACTGTGTCAGCAATCATCATTATTCCCTTGGTGCAAGAAGTTGGTGAC
+AAATTATCCGATCCAAAGGCAGCTCCAATTCTTGTGTTCGGTTGCGCCTTGTTAGCCTCA
+TGCGGTATGGGGTTGGCTTCATCTGGATTTCCAAACGTTACTGCTATTTCTATGACCGAT
+AAAAAGGGTAATAGATGGCTAACTGTAGGCGCTTTTATCTCCAGAGGTGTTCCTGCTTCG
+TTGTTAGCGTTTGTCTGCGTAATTACTCTCGGTTATGGTATTAGTTCTTCCGTCTTAAAA
+GGTAGCACTTAA
+>BUD5 1617 residues Pha 0 Code 0
+ATGAGAACGGCCGTACCGCAGTTGCTGGAAGCAACTGCCTGTGTCTCTAGAGAATGCCCC
+CTCGTCAAAAGAAGTCAGGACATAAAAAGAGCAAGAAAACGTCTACTCAGTGACTGGTAT
+AGGCTCGGCGCTGATGCAAACATGGATGCCGTATTATTAGTTGTTAACTCCGCCTGGAGG
+TTTCTGGCCGTCTGGCGACCCTTCGTAAACTCAATCCAACATGCAACTCAGGAATTGTAT
+CAAAATATCGCCCATTACCTTCTTCATGGCAACGTAAATATACAGAGGGTCACAGCACTA
+ATACAGCTCGTAATGGGACAGGACGATTTACTTTTTAGTATGGATGATGTTCTACAAGAG
+GTCTTCAGAATACAGCTCTATTTGAATAAGATGCTGCCGCACAACTCTCACAAATGGCAA
+AAGCCATCCCCCTTTGACTCCGCAAACTTACTACTTAACTTCAGAGACTGGACAACTGAC
+AATGCTCTCCTCCAAGAGTTGCTACTATCCTATCCCACAATTAATAAAAACAAACACAAA
+AATCACTCCGTCCCTCGTCTAATACAAATCTGGGTAGAGTCTTATTGGCAAGATAGTGAG
+ACAACATTAAAAGATATCCTCAATTTTTGGTACAGTCACTTGGCTGAATATTATGAATAC
+CAAGAACTGTTTGCAGACATAGTTCAGCTGTTTATAAACAAAAAAAGAACGAGGCAATTG
+AAGATTCATTACATTGGTCTAACTGATAAGGAAATCGAAGAAAATAAACCGCCCCTGGAC
+TACGAAAACTTATTTCTCCAATACGAGATAGACAAAACGAACGCAAATGATGAATTGTGC
+GGTGCAACTGACCTCAGTGATTTACTTTTCCAATGGAAACAGGGTGAACCTCTAGAAGTC
+GAAGCCTTCGCTCTAAACGTATCTCCATGGTCACTTGCAAAGACATTGACTCTCTTAGAA
+TCTTCTCTTTACTTGGATATTGAAACAATAGAATTCACAAGACATTTCAAACACAACGAT
+ACAACAATTGACTCCGTGTTTACGCTTTCCAACCAGTTATCGTCCTACGTTCTTGAGACA
+ACTTTGCAGCAAACGCACACCATTTCCTACTGGTTACAAGTTGCACTTGCTTGTCTATAC
+TTACGAAACTTAAACTCACTTGCTTCAATCATTACATCATTGCAAAATCATTCAATAGAA
+AGACTATCTCTCCCGATAGATGTTAAATCAGACCACCTTTTTCAGCGCCTAAAAGTCGTC
+GTACATCCAAACAACAACTACAACGTTTATAGAAGAACAATTAAACATATTTTCCACAGT
+CAGCTTCCTTGTGTACCTTTTACATCACTGCTTATCAGGGACATTACCTTCATAAGAGAC
+GGAAACGATACATTCACTAAAGATGGTAATAACGTGAATATGCAAAAGTTCAACCAAATC
+ACAAAGATAGTCGCTTTTGCGCAATATTTACAACAAAAGCAATATGAAGATATACACTGT
+TCAAATACTACTGCAAGAAGCTTATTAGGGGCTATGATAAAGGTGCACACTTTATATAAC
+GACAACAAAGACAGGGCGTATCAAGTCAGTATAGCTAAGGTTCCAAGGCTTACCTAA
+>MATALPHA2 633 residues Pha 0 Code 0
+ATGAATAAAATACCCATTAAAGACCTTTTAAATCCACAAATCACAGATGAGTTTAAATCC
+AGCATACTAGACATAAATAAAAAGCTCTTTTCTATTTGCTGTAATTTACCTAAGTTACCA
+GAGAGTGTAACAACAGAAGAAGAAGTTGAATTAAGGGATATATTAGGATTCTTATCTAGG
+GCCAACAAAAACCGTAAGATTAGTGATGAGGAGAAGAAGTTGTTGCAAACAACATCTCAA
+CTCACTACTACCATTACTGTATTACTCAAAGAAATGCGCAGCATAGAAAACGATAGAAGT
+AATTATCAACTTACACAGAAAAATAAATCGGCGGATGGGTTGGTATTTAATGTGGTAACT
+CAAGATATGATAAACAAAAGTACTAAACCTTACAGAGGACACCGGTTTACAAAAGAAAAT
+GTCCGAATACTAGAAAGTTGGTTTGCAAAGAACATCGAGAACCCATATCTAGATACCAAG
+GGCCTAGAGAATCTAATGAAGAATACCAGTTTATCTCGCATTCAAATCAAAAACTGGGTT
+TCGAATAGAAGAAGAAAAGAAAAAACAATAACAATCGCTCCAGAATTAGCGGACCTCTTG
+AGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA
+>MATALPHA1 528 residues Pha 0 Code 0
+ATGTTTACTTCGAAGCCTGCTTTCAAAATTAAGAACAAAGCATCCAAATCATACAGAAAC
+ACAGCGGTTTCAAAAAAGCTGAAAGAAAAACGTCTAGCTGAGCATGTGAGGCCAAGCTGC
+TTCAATATTATTCGACCACTCAAGAAAGATATCCAGATTCCTGTTCCTTCCTCTCGATTT
+TTAAATAAAATCCAAATTCACAGGATAGCGTCTGGAAGTCAAAATACTCAGTTTCGACAG
+TTCAATAAGACATCTATAAAATCTTCAAAGAAATATTTAAACTCATTTATGGCTTTTAGA
+GCATATTACTCACAGTTTGGCTCCGGTGTAAAACAAAATGTCTTGTCTTCTCTGCTCGCT
+GAAGAATGGCACGCGGACAAAATGCAGCACGGAATATGGGACTACTTCGCGCAACAGTAT
+AATTTTATAAACCCTGGTTTTGGTTTTGTAGAGTGGTTGACGAATAATTATGCTGAAGTA
+CGTGGTGACGGATATTGGGAAGATGTGTTTGTACATTTGGCCTTATAG
+>TSM1 4224 residues Pha 0 Code 0
+ATGATGTCCTTTTCCAAAAACGCCACTCCTAGAGCCATTGTTAGTGAATCTAGCACTTTG
+CATGAGATGAAGTTTAGAAATTTTAGAGTTGCCCATGAAAAAATCTCGTTGGATATAGAT
+CTAGCTACTCACTGCATTACCGGTAGCGCTACTATAATAATCATTCCGTTGATCCAAAAC
+CTAGAATATGTAACTTTTGATTGCAAGGAAATGACTATTAAAGATGTTCTGGTCGAAAAT
+CGTCGATGTGATCAATTTATTCATGACGACCCACTTCAAACAAATTTGAATGGATTGACT
+TCACAAAATGTATTATACAGCGACAATTCCATTGAACAGTCACATTTTTTGAGATCTAAG
+TTTGCTAGCTTGAATGAATACCCAGAAACGGACTCTAAATCCCAGTTAACTATAAAAATA
+CCATCTTCCATCAAAATATCTTTGGAGGACGCCAATGCATTAAGTAATTACACTCCGATT
+ACTCCTTCAATTAAGACTACCCCTGGGTTTCAAGAATCTGTTTTCACTCCAATTACATTA
+CAAATTGAATATGAAATCAGAAACCCAAAGTCGGGTATTAAATTCGATACTGTGTATGCT
+GACAAGCCCTGGTTATGGAACGTTTACACTTCAAATGGTGAGATTTGCAGTTCTGCATCA
+TATTGGGTCCCATGTGTCGATTTGCTTGATGAAAAATCTACATGGGAGTTAGAATTCAGC
+GTACCGAGATTGGTTAAAAATATAGGTACTTCGAAATTAATCGGACAAAATGGAGAAGAG
+AGTGAAAAAGAGAAGGAGGATACGCCTGAGCACGATGAAGAGGAAGAGGGGAAGCCGGCA
+AGAGTTATCAAAGACGAAGATAAGGATTCTAACTTGAAAAATGACGAAGAAGGCAAAAAT
+AGTAAAAGCAAAGATGCACAAGATAATGATGAAGAAGAAGAGGAAGGCGAAAGTGACGAA
+GAGGAAGAGGAAGGGGAAGAGGAAAGGCGGAATATTGAGGAAAGCAACAATCCGAGTTTG
+AGGGATGTGATTGTGTGTTGTTCAGAATATTCAAATATTAAAGAACTTCCGCACCCGATT
+GATTTGACGAAAAAAAAATGCATATTTCAGATAATTAATCCTGTGGCTCCACATCACATT
+GGTTGGGCTATAGGCGCCTTTAATTCATGGTCTTTACCTTTGATATCACCTCCAAGTGTT
+GATGCCGAGGACGAAGTAGAGGAAGACAAGTTGAGAGAGAATGTTGTGGACAATGTTAAC
+GATACTATGGATGACGACATTGGTTCGGATATTATACCCATTCAAATTTTCACACTTCCG
+ACGCAGGAAACAGATGAGTTAACAGTTATAAATTCGACAGTTGTCTGCCAAAAAATTATA
+GATTTCTACTCGAAAGAATTTGGGTCTTATCCTTTCACTTGTTACTCTATGGTGTTTTTA
+CCTACCGCACCTTCTAAGCATATGGATTTTGCAGCATTAGGCATTTGTAATACCAGATTA
+TTGTACCCTCTAGAAGTTATTGATAAAGCATTCAGTACTACGAATGAGTTAGCATGGGCA
+CTTGCTAACCAATGGTCTTGTGTGAATATAACTCCTTTAGATATGAACGACTACTGGTGC
+TGTCTTGGTATTGCTGGTTATATGGTGTTTCAGGTAACCAAAAAATTAATGGGTAATAAC
+ACGTATAAATATCAATTAAAGCGTAATAGTGAGGCGATTGTGGAACAAGACTTCGAGAAA
+CCGCCTATTGGGAGCACTTTTACCGGCAGTTCTAGGCCAATATCTTGGTCTTCTAAAGAT
+TTGTCCTTTATACAATTGAAGGCACCGATGATACTACACATACTTGACAGAAGGATGACT
+AAAACAGAACGATCTTTCGGTATGTCTCGAGTATTACCTAAAATTTTCCTTCAAGCTATG
+TCTGGTGATTTACCGAATAATTCGTTGACTTCATCGCATTTTCAACATGTTTGCGAAAGA
+GTTAATAAAAGTAAATTAGAGAATTTTTTCAACGAATGGGTATATGGGTCTGGGGTACCC
+ATATTACGTGTCACCCAAAGATTTAATAGGAAGAGGATGGTTATAGAACTGGGTATAAGG
+CAAGTTCAAGATGAAGAACTTGGCCACGAAAAAGTGGTAGGGGAGGAAGGATTTTTCAAA
+AGTGCACTAGACCACTTAGAACATCCAGATTTGAACCGAACCGAATGCTTCACGGGCTCG
+ATGACTATAAGGATCCATGAACACGATGGTACTCCGTATGAGCATATTGTGGAAATCAAA
+GATACATTCACAAAAATAGATATTCAGTACAATACAAAGTACAGAAGATTAAGGAAAAGA
+GGTGGTGGTGCAAATGATGAAAATGGTGTTGAAAACAATAATGAGGAGAAGCCTATTGTT
+GTGGATGTGAATTGTCTAGGAAATGTATACATGTCGCCCGAAGAGTGTTCCCGATTCAGT
+TTGACGGAATTTAATCGTACGTCTGAGAGTAATGAATTGCTTAAGCAAAACGAAGCATTT
+GAGTGGATACGCATAGACTCTGATCTGGAATGGATTTGCCAAATGCACATTAATCAGCCG
+GATTACATGTTTTCTTCTCAGTTGAGACAAGATGGGGACATAGAGGCCCAACTAGAAGCC
+ATACGATATTATGAGGACGTCGTTGTTAATGGTGGTGTGAAATCACTTGTTTATTCAAGT
+ATTTTGTTTAGAACGGCGATCGACGAGCGTTACTTTTTTGGCATAAGACTCGCGGCGTGC
+GAAGCGCTTAGTAAATACGTATATGATCCGGATTTTACTGGCGGTGTTAAGCATTTAATT
+CAGATTTTTCAGATTTTGTTTTGCCTAGAAGACTCTAATATTCCAAAGAGTAATAACTTT
+GAGAATCCTAAGTTGTATTTCTTACAGTGTAATATTCCCAAATATTTGGCTAAAGTGAAA
+AATGAAAATGGTAAATGTCCAAAATTGGTGAAGCAATTTTTACTGGATATTCTTGTTTAT
+AATGAGAATGGTGAAAATAAATACAGTGATGATGCGTACGTCCGCAGCTTGATTGAAAAT
+GTTGTTAAAGTTGCTTTAAATGAGTATAAAGATAAAGCATATATGGAAAAAGTTAAGACT
+CAGTTATTGAGGTACGAAAATTTGGTGAATTGGCTTTCATCATACGAGTCTTTGATTAAG
+ACTACTATCATGTATGCTAAGTACAAATTGCATAAAGTGGGTGCTTATGACTTTACGGAA
+TTGACAGGAATGATAATGCATACATTAACATTAGGTATAAATAACGGAGATATTTCCAGG
+GAAAGCTTTCAGAATGAGTTTTTAATGGTTTTGAAAATCATGCTTTTAGAAGGTGGTTTA
+AAAAACAAGGATGCCCTTGTTTTGTTTACTGAAATACTTTGCTTCCATGAGGATTCTTAT
+ATTAGGGATAAAAGTGTTGATGTGCTTTCTGAATGTGTAAATCTAGTTGTTATGGATGGT
+AGTTTGGATACCATAAGTGACGATATTAAGTCCTCCGTCCAATCTGTGCACAATGAAGTT
+AAAAATATAAAAAGTGAGGATGATATTGAGTTGTTTTTAAGTGGTCATTACGTCGATGAT
+ATGAAAATAAAAATAGAAAAGATTGGCCGTCAAAATATTAGTGGGTTAATACAAATATGC
+CGAGATATGTTTAAAGGGTATAGCCCTTTGAAGATATTACTCTGGGATGTTTTGAATTTA
+CCTGTTCTTAGCTTGTACCAGAGGAAGCAAATACATGATCTTGTTAGGGTGATGTACACC
+CTAATCAACAGTTTTGTAGTTAGATTGGAAACACCAAGGGAGAGAAGACTTGTGGCGAAG
+ATGAATAGTAATGAAGAAGGTAAACTTGATATTGTTATAAAGCGTGAAAGTATCCTAAAA
+GTACATATTAAAAAGGAAGTAACCTCTACTGTGGAGGCACCCAAGAAGGCGAATAAGATA
+AAGATAAGTTTGAAAGGTGATAAACCTGTTAGAAAAGTGGAAAAACAAATTGTGAAGCCG
+AAGGTAACTAGCAAACAAAGGAAAGTCAAAAGTCATGTGAACCGCATGGGCAGTTTACCT
+TTACGGTTTGTTAAGATCCAACAACAACCTAGAGTAATGGTGCATTTGTCATCCGTCCCG
+TATAGCCAATTCGTTCAAATTACAAAAGTCACATCAAGATCGTTTATGGTTAAGATAAGA
+ACAAAGAATGATGCTAAGAATTGA
+>YCT5 1476 residues Pha 0 Code 0
+ATGAAGCCACAGTGCATACTCATCTCTTTGCTGGTCAACCTCGCATACGCAGAGGAGTAT
+TTGGTGAGGTTCAAAAATCCCACAGCATTCCAACAATTCACTTCGAATTCCAACAGGTCA
+TGGAGACAGTTCATCGACAACAAAATTGAGAAGAAATTCTCCATCGGATCCTTCCGCGGC
+GTGACCATGAACCTGTCCAAGAACTTAGTGAACAAGCTGAAGAAAAGCCCACTGGTGGCT
+GATATTGTGCCCAACTTCAGGTTCGAAGCTTTTGAAGGCGACAGTGTAAATAGCGCCGAG
+TCGAGTTATACGTTTAACGCTACCGCCAAATACTCGTACGAAGACGTCGAGGAAGAGCAA
+AATATAACGTATCAACCAGACGCACCCCGTCACTTGGCCCGGATTTCCCGCCACTACCAA
+CTCCCATTCGACGTTGGGGACAAGGACCGCTACAAAAGCTGGTTCAATTACTACTATGAA
+CACGACTATCAAGGTCAAGACGTCAACGCCTATATCATGGATACGGGTATCTTCGCGGAC
+CATCCGGAATTCGAAGACAGAGTCATCCAGGGGATTGACTTGACCAAAGAAGGGTTTGGC
+GACCAGAATGGCCACGGAACGCACGTGGCGGGACTCGTAGGTTCCAAAACGTATGGAGCG
+GCAAAGAGGGTCAATCTTGTGGAGGTCAAAGTCTTGGGCAAAGACGGGTCTGGCGAGGCC
+AGTAACGTTCTTAGTGGTCTGGAGTTCATCGTGGAACATTGCACAAAGGTCAGTCGCCCA
+CAGGGTAAAAAATGCGTGGCCAATCTAAGTCTAGGGAGTTTCAGGAGCCCCATAATCAAC
+ATGGCAGTGGAGGGGGCCATTGAAGAAGGTATTGTATTTGTTGCCGCGGCGGGGAACTTC
+AATTTAGACGCCTACTGGGCCTCACCTGCGTCTGCAGAAAACGTTATCACCGTAGGGGCC
+TTTGATGACCACATTGACACGATTGCCAAGTTCAGCAATTGGGGGCCCTGTGTAAACATC
+TTTGCCCCAGGCGTGGAAATTGAGTCGCTATCTCATCTGAACTACAACGACACTTTAATT
+TTGTCAGGTACATCTATGTCGACGCCCATTGTCACCGGAGTTGCAGCGATCCTACTCTCG
+AAGGGAATTGAGCCTGAAATGATAGCACAGGAGATTGAGTATTTGTCCACGCGTAATGTT
+TTCCATAGAAGAACGTTGTTTTTCAAGCCTTCTACGCCAAACCAGATTCTTTACAACGGC
+GTCGATAAACTGGACGATCCATATGACGACGAAACGTTCCCTCGATTGAACATAGAGGCA
+ATTGCTAAGGAACTGGAGGAGTACAATGCCACTTTACAAACTCCTATGTCTGAGAATCTT
+CAATCTGGTTCAAAACTGTGGGGTTGGAATAACGATGTCACACTACCTCTTGGTGAGATT
+CGATTGAAGAGGCGTGATTTTATGAAAAATTTGTAG
+>PETCR46 510 residues Pha 0 Code 0
+ATGTGGAGCAGGAACGTCAGATTGCTTGGATCATGGACAAGGTCCTACATGGTCCCCGCC
+ACCAAGAGAAAAACCATCCCCGTGTACCCACCTGTGCAGCGCATAGCTTCGTCGCAGATT
+ATGAAGCAGGTGGCCCTCTCAGAAATAGAGTCTCTGGATCCCGGGGCCGTTAAGAGGAAG
+CTCATCAGTAAAAAGAACAAGGACCGCTTGAAGGCAGGCGACGTGGTCCGGATTGTGTAC
+GACTCGTCCAAGTGCTCGTACGACACCTTTGTTGGCTACATCCTTTCCATAGACCGCAAA
+CAACTGGTGCAAGACGCCTCGTTGCTGTTGCGGAACCAGATAGCCAAGACGGCCGTCGAG
+ATTAGAGTGCCATTGTTTTCGCCGCTGATCGAGAGAATCGACTTGCTAACCCCCCACGTC
+TCGAGCAGACAAAGAAACAAACACTACTACATCAGAGGTACAAGGTTGGATGTCGGCGAC
+CTCGAGGCAGGTCTAAGAAGAAAGAAATAG
+>YCT7 828 residues Pha 0 Code 0
+ATGTCACGTCCTGAGGAGTTGGCACCACCGGAGATTTTCTATAATGATAGCGAAGCACAC
+AAGTACACGGGTTCGACCAGAGTGCAGCATATCCAGGCGAAGATGACGCTGAGGGCGTTG
+GAGCTTTTGAATCTGCAGCCGTGCAGTTTCATTCTGGATATCGGGTGCGGGTCCGGACTG
+TCTGGGGAGATTTTGACGCAGGAGGGAGACCATGTGTGGTGTGGTTTGGATATATCGCCC
+AGCATGCTTGCGACCGGTCTTAGTAGAGAGCTGGAGGGCGACTTGATGTTGCAGGATATG
+GGCACCGGGATACCGTTCCGGGCGGGCTCGTTTGACGCGGCTATTAGTATCAGTGCGATC
+CAATGGCTGTGCAATGCGGACACTTCATACAACGATCCTAAACAGCGGTTGATGAGGTTT
+TTCAACACATTGTATGCTGCACTGAAGAAGGGAGGGAAATTTGTGGCCCAGTTCTACCCG
+AAAAACGACGACCAGGTGGACGACATACTGCAGTCTGCCAAGGTGGCAGGGTTCAGTGGC
+GGGCTTGTGGTGGACGACCCAGAGTCTAAAAAGAATAAGAAGTACTACCTTGTGTTGAGC
+AGTGGGGCCCCACCGCAGGGGGAGGAGCAGGTGAATTTGGACGGTGTGACCATGGACGAG
+GAGAACGTCAACTTGAAGAAACAACTGCGCCAGCGCTTGAAGGGAGGCAAAGACAAGGAG
+TCTGCCAAGAGTTTCATTCTAAGAAAGAAGGAGCTCATGAAAAGACGTGGGAGGAAAGTT
+GCGAAGGACTCCAAGTTCACCGGGAGGAAAAGAAGACACAGGTTCTAG
+>YCT9 447 residues Pha 0 Code 0
+ATGGCGCTGTCCAGGAGCGTGGGGCGAGGATCAAAACTCACGTCCCCAAAAAACGACACA
+TACTTGCTAGCATCCTTTCGGTGGAACCTCGACCGAGACTTGCTCTTCAGGTGTGAAAGG
+TACTTTTGCATGTGGGCGTCCACAGGGTACTCCTCCTCCTGCTCCTGCTTCCCTGCCACA
+CGTTCCGCCTCAGTCGACTCCACTCCTTCAGTCGACTCCACTGGCTCCACCAGCGACGTG
+GTAGACGACCGTGGCGAAACCTCCATGGACTCCTGTGGCAGGATCACGTTATCGTACGTG
+ACCGAATGCCGTTTGTTGGCTTCTGCGGAATTGAGTCTGCGGATCTTAAGAAACTCTTCG
+TCTTGCAACAAATCCTTAGTCTCCGTCATTCTTGCAATCTGTTTTGGCGCTCTTGCTGCA
+AGCCGTGCTGAACAACCACCTGCGTGA
+>ARE1 1833 residues Pha 0 Code 0
+ATGACGGAGACTAAGGATTTGTTGCAAGACGAAGAGTTTCTTAAGATCCGCAGACTCAAT
+TCCGCAGAAGCCAACAAACGGCATTCGGTCACGTACGATAACGTGATCCTGCCACAGGAG
+TCCATGGAGGTTTCGCCACGGTCGTCTACCACGTCGCTGGTGGAGCCAGTGGAGTCGACT
+GAAGGAGTGGAGTCGACTGAGGCGGAACGTGTGGCAGGGAAGCAGGAGCAGGAGGAGGAG
+TACCCTGTGGACGCCCACATGCAAAAGTACCTTTCACACCTGAAGAGCAAGTCTCGGTCG
+AGGTTCCACCGAAAGGATGCTAGCAAGTATGTGTCGTTTTTTGGGGACGTGAGTTTTGAT
+CCTCGCCCCACGCTCCTGGACAGCGCCATCAACGTGCCCTTCCAGACGACTTTCAAAGGT
+CCGGTGCTGGAGAAACAGCTCAAAAATTTACAGTTGACAAAGACCAAGACCAAGGCCACG
+GTGAAGACTACGGTGAAGACTACGGAGAAAACGGACAAGGCAGATGCCCCCCCAGGAGAA
+AAACTGGAGTCGAACTTTTCAGGGATCTACGTGTTCGCATGGATGTTCTTGGGCTGGATA
+GCCATCAGGTGCTGCACAGATTACTATGCGTCGTACGGCAGTGCATGGAATAAGCTGGAA
+ATCGTGCAGTACATGACAACGGACTTGTTCACGATCGCAATGTTGGACTTGGCAATGTTC
+CTGTGCACTTTCTTCGTGGTTTTCGTGCACTGGCTGGTGAAAAAGCGGATCATCAACTGG
+AAGTGGACTGGGTTCGTTGCAGTGAGCATCTTCGAGTTGGCTTTCATCCCCGTGACGTTC
+CCCATTTACGTCTACTACTTTGATTTCAACTGGGTCACGAGAATCTTCCTGTTCCTGCAC
+TCCGTGGTGTTTGTTATGAAGAGCCACTCGTTTGCCTTTTACAACGGGTATCTTTGGGAC
+ATAAAGCAGGAACTCGAGTACTCTTCCAAACAGTTGCAAAAATACAAGGAATCTTTGTCC
+CCAGAGACCCGCGAGATTCTGCAAAAAAGTTGCGACTTTTGCCTTTTCGAATTGAACTAC
+CAGACCAAGGATAACGACTTCCCCAACAACATCAGTTGCAGCAATTTCTTCATGTTCTGT
+TTGTTCCCCGTCCTCGTGTACCAGATCAACTACCCAAGAACGTCGCGCATCAGATGGAGG
+TATGTGTTGGAGAAGGTGTGCGCCATCATTGGCACCATCTTCCTCATGATGGTCACGGCA
+CAGTTCTTCATGCACCCGGTGGCCATGCGCTGTATCCAGTTCCACAACACGCCCACCTTC
+GGCGGCTGGATCCCCGCCACGCAAGAGTGGTTCCACCTGCTCTTCGACATGATTCCGGGC
+TTCACTGTTCTGTACATGCTCACGTTTTACATGATATGGGACGCTTTATTGAATTGCGTG
+GCGGAGTTGACCAGGTTTGCGGACAGATATTTCTACGGCGACTGGTGGAATTGCGTTTCG
+TTTGAAGAGTTTAGCAGAATCTGGAACGTCCCCGTTCACAAATTTTTACTAAGACACGTG
+TACCACAGCTCCATGGGCGCATTGCATTTGAGCAAGAGCCAAGCTACATTATTTACTTTT
+TTCTTGAGTGCCGTGTTCCACGAAATGGCCATGTTCGCCATTTTCAGAAGGGTTAGAGGA
+TATCTGTTCATGTTCCAACTGTCGCAGTTTGTGTGGACTGCTTTGAGCAACACCAAGTTT
+CTACGGGCAAGACCGCAGTTGTCCAACGTTGTCTTTTCGTTTGGTGTCTGTTCAGGGCCC
+AGTATCATTATGACGTTGTACCTGACCTTATGA
+>RSC6 1452 residues Pha 0 Code 0
+ATGGTAACACAGACCAATCCGGTCCCTGTTACATATCCAACGGATGCTTATATCCCCACG
+TATCTGCCCGATGATAAGGTCTCCAATCTGGCAGATTTGAAAAAATTGATAGAAATGGAT
+TCCAGACTAGATTTGTATCTGACAAGAAGGAGGCTGGATACGTCCATCAATTTACCTACA
+AACACCAAGACCAAGGACCATCCCCCCAATAAAGAGATGCTGAGGATTTACGTCTACAAC
+ACTACGGAAAGCAGCCCTCGCAGCGATTCTGGCACCCCAGCGGACTCAGGCAAGACTACA
+TGGACACTGAGAATAGAAGGTAAGCTTCTGCACGAGTCCGCAAACGGAAAGCACCCATTT
+AGTGAGTTTTTGGAAGGTGTCGCGGTCGACTTTAAAAGACTGAAACCGCTGGGCATGGGC
+AAGAAGAGGAAACGCGATTCGTCATTGAGCCTTCCTTTGAATCTGCAACAACCCGAATAC
+AATGATCAAGATAGCACCATGGGCGATAACGACAACGGCGAGGATGAGGACAGTGCAGAG
+GCAGAATCCAGGGAGGAAATTGTAGACGCACTGGAATGGAACTACGATGAAAACAACGTT
+GTGGAGTTTGATGGTATCGACATCAAGAGGCAAGGCAAGGATAATTTGCGATGCAGTATA
+ACCATCCAGTTGAGGGGTGTCGACGGTGGAAAAGTACAGTACTCGCCCAACTTAGCTACC
+TTGATAGGTATGCAAACGGGCTCCGTTAATGACGCGGTTTATTCGATCTACAAGTACATT
+TTGATCAACAATCTGTTTGTTACGGAACAAACAGAGGCTCAAGATGGTTCCAACGATGCC
+GAAGACAGCAGTAACGAGAATAACAATAAAAACGGTGCTGGTGACGATGATGGCGTCGAG
+GGAAGTACTCCAAAGGATAAGCCCGAATTGGGTGAAGTGAAGCTAGATTCACTCTTACAA
+AAGGTATTGGATACAAACGCCGCGCACCTCCCCTTGATGAATGTTGTGCAAACCGTGAAC
+AAACTGGTATCACCCCTACCGCCCATCATCCTAGATTATACAATTGATCTTTCCAAAGAT
+ACCACCTATGGTGCTACCACCTTGGATGTAGATGTGTCGCACATTCTCCACCAGCCTCAA
+CCCCAGCCAAATTTACAAAAAGAGGAAGAAACAGATGCTGAAGACACAGCAAAACTACGT
+GAAATCACAAAGCTTGCCTTGCAGTTGAACTCTAGTGCTCAAAAATACCAGTTTTTCCAC
+GAACTGTCTTTGCATCCAAGAGAAACGCTGACTCACTACTTATGGTCTTCCAAGCAAAAC
+GAGCTTGTGCTGCAGGGCGACCAATACTTCAATGAAGATGCTGCAAGAACGAGTGACATA
+TACAGTAACAACAACAATGACAGGTCACTAATGGGCAATATCTCACTACTGTACTCCCAA
+GGAAGACTATAA
+>THR4 1545 residues Pha 0 Code 0
+ATGCCTAACGCTTCCCAAGTTTACAGATCTACCAGATCCAGCTCTCCAAAGACAATCTCT
+TTTGAAGAGGCTATCATTCAAGGTCTGGCCACTGACGGTGGTCTTTTCATTCCACCAACT
+ATTCCACAAGTGGACCAAGCCACTCTTTTCAATGATTGGTCAAAGCTCTCCTTCCAAGAC
+TTAGCCTTTGCTATCATGAGACTATACATTGCCCAAGAAGAGATTCCAGATGCTGATCTA
+AAGGACTTGATCAAGAGATCTTATTCTACTTTCCGTTCTGATGAAGTCACCCCCTTGGTG
+CAAAACGTCACTGGTGACAAGGAGAATTTGCACATTTTAGAATTATTCCACGGTCCTACC
+TACGCTTTCAAAGACGTTGCTTTACAATTTGTCGGTAATCTTTTTGAATACTTCTTACAA
+AGAACCAACGCCAATTTACCTGAAGGCGAGAAAAAGCAAATCACTGTGGTCGGTGCTACT
+TCCGGTGACACTGGTTCTGCAGCCATCTACGGTTTAAGAGGCAAAAAGGACGTTTCCGTT
+TTCATCTTATATCCAACCGGTAGAATTTCCCCAATTCAAGAAGAACAAATGACCACCGTT
+CCAGATGAAAACGTCCAGACTTTGTCTGTTACCGGTACTTTCGACAACTGTCAAGATATC
+GTCAAAGCTATTTTCGGTGACAAAGAATTCAACTCTAAACACAACGTCGGTGCTGTTAAC
+TCCATCAACTGGGCAAGAATCTTGGCCCAAATGACCTATTACTTTTATTCATTCTTCCAA
+GCCACCAACGGTAAGGACTCCAAGAAGGTCAAGTTCGTTGTGCCAAGTGGGAACTTCGGT
+GATATATTGGCCGGTTATTTTGCCAAGAAAATGGGTTTGCCTATTGAAAAACTGGCCATC
+GCTACCAATGAAAACGACATTTTGGACAGATTTTTGAAATCTGGTCTATACGAAAGATCA
+GACAAGGTTGCTGCTACTTTATCCCCAGCAATGGATATCTTAATCTCTTCTAACTTTGAA
+AGACTACTATGGTACCTAGCTCGTGAATACCTAGCTAATGGTGATGATTTGAAAGCCGGT
+GAAATCGTCAACAATTGGTTCCAGGAATTGAAGACCAACGGTAAGTTCCAAGTTGACAAA
+TCCATCATTGAAGGCGCATCAAAGGACTTTACATCAGAAAGAGTTTCCAATGAAGAAACA
+TCTGAAACAATCAAGAAGATCTACGAATCATCTGTAAATCCAAAACATTACATCTTAGAT
+CCTCACACAGCTGTCGGTGTTTGCGCCACAGAAAGATTGATTGCAAAAGATAATGACAAG
+TCCATCCAATACATTTCTCTATCTACCGCTCACCCAGCTAAATTTGCCGATGCTGTAAAC
+AATGCATTGTCTGGATTTTCCAATTATTCATTTGAAAAGGATGTTTTGCCTGAGGAATTG
+AAGAAACTATCCACATTAAAGAAGAAATTAAAATTCATCGAAAGAGCTGACGTTGAATTG
+GTCAAAAACGCTATTGAAGAAGAACTTGCTAAAATGAAATTATAA
+>CTR86 1692 residues Pha 0 Code 0
+ATGCCTATGAACAATTTTCTAGATGAATTCAATTTATTTGATTCAATCATTACCATGATG
+AAGAACGACCCATGTTGCGTCGAGGATTATGAGCCAATCGTCGAAAACCTGAACCGTATA
+TTTCAAAGGACGTTTAATGATGAAGAACATAGGAAATCAATGGCTAACTCCCAGCTTTTT
+TGGGAACGATTAAGAGACACCTTGGAAGCAATGCTGTTGCCAGCGTCGTTAAATGAGAAT
+AGCTCAATACCGTATACAAGAACAGTGAGGGGCCTTATCTTAATGATGAGAAACCTTGCC
+GCTGAAAACCAGGAAATACCCCAAAAGCTTTTACTACAAAACCTCGTAATTCGTGGTTTT
+CTGCATGCAACTAGTGAGTATGTCGTTGACACTCCGCTAATCAAACATCTATACATCGCA
+TGTTTAACGTGCCTTTTCAATATACAGCAGAACTACTCTACAGTGGATATGACTACTTTT
+CCAGCTCTTTTACAATTTCTTCAATACCCTTATGGGATCAAATTGGAAGACGGTGAAGAA
+GAAGAGCATTTCTGGCTACCATATTTATTTCTTTTCAAGACGTATCTCAACAATGATGAA
+TTTTCCAACGAATTTTTCAGGGATAATGATACACCCCAGAAAGACTATTATTGTGTTAGG
+GATAGAATATTTTTCGATATAGTGACAGCCAAATTCATCCAGGATCAAGAGAATTCCTTT
+TTAATTGAGAAGGGCAGAAACTATCTGGATGATTCAAAATTGGAAATAACTTCTATTGAC
+CTATCTGTCTTAGAATGTATTAGCAAAAGTCTTACAACTGCTTCTTTTGGTAAATACCTC
+AATGGGTTAGAAGAAAGACAGCCAGGAAAATTCACCACTTTGTTGCAGATATTGCAATTG
+GTTGTAACGAGTAAAGAAGATTGGAATACCTATGAGTTGACTGCAATTATGTCATGGTGC
+TACCCCATTCTGCAACGTCTTGCATGCAAGGATATTCCTGCCTTTTTCAATAAAAGTTGT
+AACGATTATGCTCCTTCAGTTGCCATCCAATTACACTCCACTTTACTTTCTTGCCTGGAC
+ATAATTTCTGACTTGTGCAAATTCAATCATGTTAGAAAATTCTTAATTTCGTATGACTCT
+GTGAAAATATTGGTATCTCTCTTGGATACTTTCCAAAAGAATTTGTTGAGGATTAATTTT
+TTGAAAGGAAACGGTGATACGGTGAATGAAATTAAAATCACAGATCATGAAGGTAACAAA
+ATCGAGGACCGGTTATTAATTTTCAACCGTGTTAATACCAACGAATCCTTTATTAGGGCT
+GATAATTTTCCCCATTGTAAATTAGTAATAATCGAAATATTGGCATCGTTAGTGTATGCA
+CATCCTGAAATCCAAGATCAAATAAGAGAATTAGGTGGTCTTGCATTAATTCTTTCCAAT
+TGTGTCATCGATGATAATGATCCGTTTATCAAGGAAAGATCTATTGTTTGCTTGAAGTTT
+TTGTTAAAGAATAATGCCAAGAATCAGGAATATGTCAAAAAAATGGAAGCTCAAGACGTT
+GTTCAAGACGATGCATTGAGCAAAGCTGGGTTTGAAATATCAGTTGAAAAGGGCGGGAAA
+GTTAGATTAGTATCTAAAGAAGAAGACCCTGGGAACGAGAATTCTGAGATTATTAGCATA
+GATGAAGATTAA
+>PWP2 2772 residues Pha 0 Code 0
+ATGAAATCCGATTTCAAGTTCTCTAACCTTTTAGGTACGGTCTACAGGCAAGGTAACATC
+ACCTTTTCCGATGATGGCAAGCAACTACTCTCACCGGTGGGGAATAGGGTCAGCGTGTTT
+GACTTAATCAACAACAAATCGTTCACGTTTGAATACGAGCATCGCAAAAATATTGCTGCC
+ATTGATCTGAACAAACAAGGCACATTGCTGATTTCTATTGACGAGGACGGTCGCGCCATC
+CTTGTCAATTTCAAAGCCCGTAACGTGCTTCACCATTTCAACTTCAAAGAAAAATGCTCC
+GCTGTGAAGTTCAGCCCTGATGGGAGACTCTTTGCATTAGCCTCAGGCAGGTTTTTACAG
+ATTTGGAAGACTCCAGATGTTAATAAAGACAGACAGTTTGCTCCCTTCGTCCGCCATAGG
+GTGCATGCGGGACACTTTCAAGACATAACGTCTTTGACGTGGTCACAAGATTCCAGATTT
+ATCCTTACGACTTCCAAAGACTTAAGCGCAAAAATATGGTCCGTAGATTCAGAGGAAAAG
+AACCTTGCGGCGACAACATTTAATGGGCACAGAGACTACGTTATGGGTGCGTTCTTCAGT
+CATGATCAGGAAAAAATCTACACTGTAAGCAAAGACGGTGCTGTCTTTGTCTGGGAATTT
+ACCAAGAGGCCATCCGATGACGACGACAATGAAAGTGAAGACGACGACAAGCAAGAAGAA
+GTAGATATTTCGAAATACAGCTGGAGAATCACAAAGAAACATTTTTTTTACGCAAACCAA
+GCCAAAGTAAAGTGTGTCACCTTCCATCCAGCAACAAGGCTTTTAGCTGTCGGATTTACT
+AGTGGGGAATTCCGTCTTTACGATTTGCCTGATTTCACTTTGATTCAACAGCTTTCTATG
+GGGCAAAACCCAGTCAACACCGTTAGCGTCAACCAAACCGGCGAATGGCTGGCGTTTGGT
+TCCAGCAAACTGGGCCAATTACTAGTTTACGAATGGCAATCGGAATCGTATATCTTGAAG
+CAGCAGGGCCATTTCGATTCCACAAATAGTCTTGCATACTCTCCGGATGGTTCACGTGTA
+GTGACAGCATCCGAAGATGGGAAAATCAAAGTTTGGGACATTACATCAGGGTTTTGTTTG
+GCCACTTTTGAAGAACACACCTCTTCAGTTACTGCTGTACAGTTTGCGAAAAGGGGTCAG
+GTCATGTTCTCATCATCGTTAGATGGTACGGTGAGAGCGTGGGACTTAATCAGGTATCGT
+AATTTTAGAACATTCACTGGTACTGAAAGAATCCAATTCAATTGTTTAGCGGTGGATCCA
+TCAGGTGAAGTGGTTTGTGCCGGGTCCCTGGACAATTTTGACATTCATGTTTGGTCCGTG
+CAAACTGGTCAATTATTAGATGCTTTGTCCGGACATGAAGGCCCTGTTTCGTGTCTTTCA
+TTTAGTCAAGAGAACAGTGTCTTAGCTTCTGCATCATGGGATAAAACAATTAGAATCTGG
+TCCATATTTGGTAGAAGCCAACAAGTAGAACCTATAGAAGTTTATTCCGATGTTTTAGCC
+TTATCAATGAGACCAGATGGTAAAGAAGTTGCAGTATCTACCTTAAAGGGTCAAATATCC
+ATTTTCAACATAGAAGATGCCAAGCAGGTGGGCAACATTGACTGTAGAAAGGATATAATA
+TCTGGTAGGTTTAATCAAGATAGGTTCACTGCCAAAAATTCTGAACGATCCAAATTTTTT
+ACTACAATACATTACAGTTTTGATGGTATGGCTATTGTGGCTGGTGGTAATAATAACTCC
+ATTTGTCTATATGATGTTCCAAATGAAGTCTTGTTAAAAAGATTCATTGTGTCCAGAAAC
+ATGGCTTTGAATGGTACTCTCGAATTTTTAAACAGTAAGAAAATGACTGAAGCAGGTTCA
+TTAGATTTGATTGACGATGCAGGCGAAAATTCAGATTTGGAGGATCGTATTGATAATTCT
+TTACCAGGGTCTCAAAGAGGTGGCGACCTGTCCACAAGAAAAATGAGACCAGAGGTTAGA
+GTTACTTCGGTGCAATTCTCCCCAACGGCGAATGCATTTGCCGCTGCTTCAACGGAAGGT
+TTATTGATATATTCCACCAATGACACGATATTATTTGATCCCTTTGATCTGGATGTGGAC
+GTCACCCCCCATTCTACTGTAGAGGCGCTACGAGAAAAGCAGTTTTTAAATGCATTAGTA
+ATGGCGTTCAGGTTAAATGAAGAATATTTGATCAATAAAGTCTATGAAGCCATACCTATT
+AAGGAAATCCCCTTGGTTGCAAGTAATATTCCTGCAATATATTTACCGAGGATTCTGAAG
+TTCATCGGTGATTTTGCCATTGAATCCCAACACATTGAGTTTAACCTAATTTGGATCAAA
+GCTCTATTATCTGCGAGCGGTGGTTACATAAATGAACACAAATATCTCTTCTCGACGGCT
+ATGAGGTCGATACAAAGATTTATTGTTAGAGTGGCTAAGGAAGTAGTCAATACCACTACT
+GATAACAAATACACCTATAGATTTTTGGTATCAACTGATGGGTCCATGGAAGATGGCGCG
+GCTGATGATGACGAGGTTCTATTAAAAGATGACGCAGATGAAGATAACGAAGAGAACGAA
+GAGAACGATGTAGTCATGGAATCTGACGACGAGGAAGGATGGATTGGTTTCAATGGGAAG
+GATAACAAATTACCCTTGTCTAATGAAAATGATTCCAGTGATGAAGAAGAAAATGAGAAA
+GAGCTTCCTTGA
+>YCU9 777 residues Pha 0 Code 0
+ATGGATGACGATCACGAACAGTTGGTCGAAGAACTGGAGGCCGTCGAGGCCATCTATCCG
+GATCTTCTCTCCAAGAAGCAGGAAGACGGAAGCATCATCGTTGTGAAAGTGCCGCAGCAT
+GAATACATGACACTGCAGATCTCCTTCCCGACACACTACCCCTCCGAGGAGGCTCCTAAT
+GTCATCGAAGTTGGTGTCTGCACTTCTTTGGCTAAGCGCGATCTCTACGATACCAAGTAC
+CTTCAGCATTTGTTCCAGGAAGTGATGGACTCTGTTTTCCACCGCGGATCTGTCTGTCTA
+TTTGACTTCCTCACAGAACTCGACGGTGTCTTGTACGTTGAACCAGAGGAGGAGACAGAA
+CCGGTCCAGCAGAGTGACATTCCCACAGACCCCTTCGAGGGCTGGACCGCGTCGGACCCC
+ATTACTGATAGAGGCTCGACTTTCATGGCCTTTGCAGCACATGTTACCTCCGAGGAACAA
+GCGTTTGCCATGCTAGACCTACTGAAGACCGACTCCAAGATGCGTAAGGCAAACCATGTC
+ATGAGTGCATGGCGAATCAAGCAGGATGGCTCTGCGGCAACATATCAAGATTCCGATGAT
+GACGGTGAAACGGCCGCCGGCTCCAGAATGCTGCACCTCATCACCATCATGGATGTGTGG
+AACGTCATCGTTGTGGTGGCCCGTTGGTTCGGCGGTGCCCACATAGGTCCCGACCGGTTT
+AAACACATCAATTCTACGGCAAGAGAAGCTGTTGTCAGGGCCGGCTTCGACTCGTAA
+>YCV1 1752 residues Pha 0 Code 0
+ATGGTGCGTTTTGTTTCAATTTTAAGTTTATTCGGCTGCGCGGCGACGCTTGTCACGGCC
+CATGATGACATGGACATGGACATGGATATGGACATGGATATGGACATGAATATCGATACG
+ACAACGTCTCAATCCATAGATGTCTCATCCACGGCTTCAATCGTCCCCGTGCCACATGAA
+CCAAAACATTTGCATGGCCTTCCTATACTGCAATCGCCCTCGCTTACCCCTGCGGAGAGA
+TTGTACTGGGAAAACTACAACACCACAACCTACTTTACTACACAGGCTGGGAATAGGTCT
+GCCCTTCGCTACCACATTATTACGCTGCTCTTGGTTGCATTTGTGCTCTACCCTGTGTCC
+CTGGCGCTAAGCGCCGCCCGTTCTAGGTGGTACTTACCCCTGCTGTTTGTTAATCTATGC
+ATTTGTATTTCGTCCGTAATGGCATTGTCCGTGTTCAAAAATACTTTCCCGGAAGAAGAC
+TGGTATGCGCATAATATCTATGGCACCACTTCTGTGCTACTTCTCGTTTTTATGCTTGTT
+CACTTCTTCGCTGCGGTGCTTTCTGTCCCCGTCTCATTAGCATCGAAAAAGGAGTACCGT
+CCGGTTGACACCATCCCTCTGAATGATCTTGAATCTACGCCCGTCATGGTGAATAGTGCA
+CGTGGCTCTCCAAGTCCTTCTTCCAACAGAGACACGTTGTTCTCGCTCTCTTCAGACACC
+ACGACCGCCACGGCCACCAATAATAATAAACGGAGACGCGCTGAAGGCGAAGACGAGGGT
+GATAACACCTCCAACCACGACACTTTGCGCGACGAAGACTACGATAATGATGACGACGAA
+ATTGCTTCCATTGAAGCGCCACCTCTGCTTCCTCAAGACATACCCGTTTTCCGAATCTTG
+TTTACCAACACGAAGTACCAGATGCTTGCCGCGCACCTCTCGTGCGTCGCCAACGTGGTC
+TTTCACATGCTTACCTACCCGCTATTCATGTACATCTTTGTAGACCTAATCATCGGCTTC
+GCTGTAGGTAACTTGCTCGGCAAGGGCATCCGCATCTTTAATCTCTTGGCCCACTGGATT
+AAGGGCGGCGTATTTTTTACTCTGGGCGTTGTCTCTTTAGCAAGATACTGCGGTTTCGCA
+GCTAAGTACGGCTGGGCATGGAACAACATCAGCTTCACCTCTCAACTCACACAAACGCGT
+TCCTCCAATCTTCTTTTCCGGTTTGCTCCTGCGGGGACTTTCACCATGGAATTCGTTGAA
+TCCTTCCTCATTTTCTTTTACGGGTCCACCAACATCTTCTTGGAGCACCTGGCAGGAAAC
+GGCGGCGCATGGACTGCCAAGGATTTACAGCATGTGTCGATAAATTCTCACCGGCCCCAA
+GGTGTGTGGGCTACTCACGGAGTACAAGCTCAACCATTGGCGATTCGAGCATGCCCGCAA
+ACGGCCACAGACCGATGTAGTTGCTGCCACACCGGGGTACTCTCCAAACCCGTTCCCCGC
+TTTCACCATATTTTGGACTGGGATTCTGATGTCCCAGCACGCACAGTCCTCGCAATTTTC
+TACTACCATTCACACGCAATGGGGATACTTGTTGTCCTATGGGTCCTTCTTCCGTCTGCT
+AACATTTTTGATTCTGTTTTTGGTGCCCAACACCAACAGTGCCGCATCCAAGCCTTTCAC
+GGAGTTGATCACCTCGTTCTGTCTCCTCTGTGGTGGTCTGGTATTTATGGAGTCCACGGA
+TCAGTCCATTGA
+>G10 474 residues Pha 0 Code 0
+ATGCCGCGCATAAAGACCAGAAGATCCAAGCCTGCACCTGACGGGTTCGAAAAAATCAAG
+CCAACCCTCACAGATTTCGAAATCCAACTCAGAGATGCCCAAAAGGACAAGTCGTCTAAG
+CTCGCAGCAAAGTCCAATGAGCAGCTCTGGGAGATAATGCAACTCCACCACCAGCGCTCT
+AGATACATATATACTCTGTACTACAAGAGAAAGGCCATCTCCAAAGACCTTTACGATTGG
+TTGATAAAGGAAAAGTATGCTGATAAATTGCTAATTGCCAAATGGCGCAAAACCGGGTAT
+GAAAAACTGTGCTGTCTGCGCTGCATTCAAAAGAACGAAACTAACAACGGTAGCACTTGC
+ATCTGCAGGGTGCCTCGTGCACAGTTAGAGGAAGAAGCACGCAAAAAGGGCACACAGGTG
+TCCTTCCATCAGTGCGTCCACTGCGGCTGCCGTGGATGTGCAAGCACAGACTAA
+>HCM1 1599 residues Pha 0 Code 0
+ATGATGAATGAAGACATATCCATCATTGATGGCCATAATAGTTTTTTAACGGAAAAAAGC
+ACCGTGCTATTAACCCAAGCCAAGAGAACACTAGAAGACGAAAAGGAAATGATTACTCCC
+CCGAGCTCAACTGTGAGAAAAACAATGAAGGAAGTAAATAAGAGGCCGTCGCATCCCCTC
+TCACCGGATCACTCGTCCCCAATTGCTCCATCTAAGGCCAAGCGCCAAAGATCGGACACA
+TGCGCTCGGTCCAATGGTAACCTAACCTTGGAAGAAATTCTTCAATCTTTGGAAAGAAGA
+AGAATAAATGGTGAACTCGCCAAGAAACCTCCATATTCGTATGCAACTTTGATTTGCTTG
+GCCATTTTGCAATCTCAGGAGGGAAAGCTAACGCTATCCCAGATATATCATTGGATCCAC
+GTTCACTTCCCTTATTACAAGCAGAAAGATGCTAGTTGGCAAAATTCAATAAGACATAAC
+TTGTCTTTAAATGATGCGTTCATCAAGACTGAAAAGTCCTGCGATGGTAAGGGTCATTTC
+TGGGAGGTCAGACCGGGTGCCGAAACAAAATTTTTCAAAGGTGAAAATCGTGGTTATGAA
+TTTGTAAAGGACTCCTTACAAGACATTGGGAAGTATTTTGAAATAGATTCTACACTTGAT
+GAATTAGAACAAGTTGAGAGTGGAGAAGGCAATGATGATCTTCCTGACGAGGAAGAAAGA
+GAGGAAGCAGGGAAATTCCCTTCCATTGAAATTCAATTGAACTCCTCCCCTATACTGAGA
+GTTTCCCAGTTACATCACATACCGCAATTGAAAACAGACAACAGTGTACTGAACCCTCAC
+GAAAACCTAGAATCGATGCGGAACATGATAGAAAACGATGTCAACAATATAGATTCCTTG
+GAACCTCCTTATGTCATGAAGAAATATCATACTTCTTTAGGCTTACCGTCGCTGGTGAAT
+GCCAAAGATCATTTCCAGGCGGGTGTGAAAAACAATAATATCACCCAGGCAAATAGATTT
+AATACACTCCCTATAACTAGCGCAAAGTCTCCTCAGAATTTCAGAAAATATTTCACCTCA
+TTCAATTCAAATTTTGAAGATTTATCTCCACTTCGAAGTAATGTAGGGGCTGGTTCTCTA
+CTCGACCCACTTCCGTATTCCCCATTGAAGCTGTACGATCAGAAAAATCTTGCGCTCATG
+TCGAAACCACAATCTCAGCAATCATATTCCAATTCTCAACTTCCACCTCCACCTTCCTCT
+CATGGTTCGGACTTACTTAAAACACCCAAGATGAGGCATTCCGATGGCTTAGAGAAAACC
+CCATCGCGGTTGATAAGCACACCTAAGGACGGTAACTCGATTTTGAGGAAATGGCAGACT
+CCTTCACACCTTTTTGAAGATTTGTACTGTTCTCCGCTATTTAGAGCTATAGAGACTCCA
+ATCAGGTATATCACGACGCCGGGGGGCAACTTTGGAAACCCAAATTTCACCAAGAAAGTC
+CTCTGCACCCGATGTCCTCACAAGCGCAACGAATTCCAAATTTGCTTCAAGCGGGCTGTT
+TGGCGTGGATGTTTATTCTGTTTGGAAGCGCGCAACTGA
+>RAD18 1464 residues Pha 0 Code 0
+ATGGACCACCAAATAACCACTGCAAGCGACTTCACGACTACTTCAATACCGAGCCTGTAC
+CAATTGGATACACTTTTGAGATGTCACATTTGTAAAGATTTTCTAAAAGTCCCCGTCTTA
+ACACCTTGTGGCCATACATTTTGTTCCCTTTGTATTAGAACACATTTGAATAACCAACCA
+AATTGTCCTCTCTGCCTTTTCGAGTTCAGAGAGTCCTTGCTGAGAAGTGAGTTCCTGGTC
+AGTGAAATAATTCAAAGTTATACATCCCTACGATCTTCCTTACTAGATGCACTAAGGATA
+CCGAAGCCTACCCCTGTCCCTGAGAATGAGGAAGTACCAGGTCCTGAAAATTCTTCATGG
+ATAGAACTCATATCAGAGTCTGAAAGTGACAGTGTAAATGCCGCTGATGATGACTTGCAA
+ATTGTTGCAACAAGTGAAAGAAAACTTGCCAAAAGATCCATGACTGATATATTACCACTG
+AGTTCCAAACCATCCAAAAGGAATTTTGCAATGTTCAGAAGTGAACGTATCAAGAAAAAA
+TCAAAGCCAAATGAACAAATGGCCCAGTGCCCCATATGTCAACAATTTTATCCTCTTAAA
+GCCCTTGAAAAAACACATTTGGATGAATGCCTAACTTTACAATCACTAGGCAAAAAACCA
+AAAATTTCTACCACTTTCCCTACAGAGTCAAATCCACATAACAAAAGTTCATCCAGATTC
+AAGGTACGAACTCCAGAAGTCGACAAAAGCTCATGTGGTGAGACCTCACATGTGGATAAG
+TATTTAAACTCAATGATGAGTGCAGAACACCAAAGATTGCCGAAGATCAATTTTACGTCT
+ATGACTCAATCCCAAATAAAACAAAAACTGTCATCGTTGGGACTGTCAACTAATGGTACT
+AGGCAAAACATGATTAAAAGATACAATCACTACGAAATGCTTTGGAATTCTAATTTTTGT
+GATTCTCTAGAACCTGTTGATGAAGCTGAACTAAAAAGACAGTTGTTAAGCTGGGATGTT
+TCACACAATAAAACCCCCCAAAATAGTAGCAACAAGGGTGGAATTTCTAAATTAATGATA
+ATGAAGAGTAATGGGAAATCTTCTTCATATAGGAAATTACTTGAAAATTTCAAAAACGAT
+AAATTTAATAGGAAAGGATGGATGGTTATGTTTCGGAAGGATTTTGCTAGGCTTATCAGG
+GAAGCAAAAATGAAAATAAAAACAGGTTCATCGGACAGTTCAGGTTCAGTGGGACATTCT
+AATGATGGAGATGGTGTTGAAAAAGTTCAAAGTGACCAGGGAACCGAGGATCAGCAAATG
+GAGAAGGATCAGGACACTGTTATCAACGAAGATAGAGTTGCTGGTGAAAGAAATTTGCCT
+AACGAAGATTCAACTGATGCTGACTTATCAAGAGAATTAATGGACTTGAATGAATATAGT
+AAAGACCCACCCGGTAACAATTAA
+>CYPR 957 residues Pha 0 Code 0
+ATGTGGTTGAAATCCTTGCTGCTCTGCCTGTACTCCTTAGTACTCTGCCAAGTCCACGCT
+GCACCTTCATCAGGGAAGCAGATTACCTCCAAGGATGTTGATCTTCAGAAAAAATATGAG
+CCCAGTCCCCCCGCCACACATCGTGGAATAATCACTATCGAATACTTTGATCCCGTTTCG
+AAGTCGATGAAAGAGGCGGATCTGACTTTTGAGTTGTACGGTACTGTCGTGCCCAAAACT
+GTGAACAACTTTGCTATGCTGGCCCATGGTGTTAAGGCAGTTATCGAAGGGAAAGATCCC
+AATGATATACATACTTACTCGTACCGTAAGACCAAAATCAACAAGGTTTACCCTAACAAG
+TATATCCAGGGTGGTGTGGTTGCCCCAGATGTGGGTCCTTTCACCGTCTATGGGCCCAAA
+TTTGATGACGAAAACTTTTACTTAAAACATGACAGGCCTGAAAGACTCGCAATGGCCTAT
+TTTGGACCTGATTCTAACACCTCGGAATTCATCATCACCACTAAAGCCGATGGAAATGAG
+GAATTGGATGGCAAAAGTGTCGTGTTTGGTCAAATAACTTCTGGTCTAGATCAACTAATG
+GATGCTATTCAATACACAGAAACAGACGAATATGGAAAGCCTCAGCATGAATTACGGTTC
+CTGTATTTCGTTCTAGAAATCTTAAAAATTAGTAACATCTTAGATTTGCACGCTGCGTAC
+ACAGAAAAAGTCGAGAAGTTTAGAAATGGCGATGTGTCTGTTGGCTCCACTTTGGAAAAC
+ATCTTCCGTAACGATAAAGCCTACACACCTTTAACCACCTCCACTGGAACCACCGCCTAT
+GATTTAAACCACCCAATTTCCAGAGCCTTGATGTGTTTAACTGTTCTTGGCCTTTGTTTC
+ATTGCCTACAAGGGCATGCACGAAAAGCCTCATACGGTTTCATTAAGACACAAGTAA
+>YCW1 366 residues Pha 0 Code 0
+ATGATCAGTTCGTGTGTTACTAGATGTTTTGGTAGGGGTAAATGCCTTCCAGGGCCTGCC
+ACTGCCTCGATATACCAAACGATAAGATGTATATCCACTAATTCAAATAAAGCTGCTGAG
+GCGCCAATATTTCCAAAGCTGGAAGACGTGAAGATGCATGAGCTCATAGGAAACAACAAT
+TTTGGTAAAAAGACCTACTACGTGGAGAGAAGCAGGACCGGAAATCTACCGGTGTATTCC
+GCTTATAAAAATGGAGGTAACAAGATTATCACGGAGATCAGAAAGATTGAAGGAGATGTA
+ATTCAACTAAGAAATGACTTGCAGGAGCAACTGCCTTTCATACCCAAAAAATCATGGCTG
+TGGTGA
+>YCW2 1548 residues Pha 0 Code 0
+ATGTCCACCCTGATTCCTCCACCTTCTAAGAAACAAAAGAAAGAGGCTCAACTTCCCAGA
+GAAGTAGCTATTATTCCGAAAGATTTACCCAATGTTTCAATCAAGTTCCAAGCTTTAGAT
+ACTGGTGACAATGTAGGTGGCGCCCTGAGAGTTCCCGGTGCTATCTCCGAGAAACAGTTA
+GAAGAACTTTTAAATCAATTGAACGGTACTTCAGACGATCCAGTGCCATATACCTTCAGC
+TGTACAATTCAAGGTAAGAAGGCCAGTGACCCTGTGAAGACGATTGATATAACAGATAAC
+CTATATTCTTCATTAATAAAACCAGGCTATAACAGTACAGAAGATCAGATCACGCTACTG
+TATACGCCAAGAGCAGTTTTCAAAGTCAAGCCGGTAACTAGAAGTTCATCAGCCATTGCA
+GGTCACGGTTCCACAATTTTGTGTTCTGCCTTCGCACCACATACGAGTTCTAGGATGGTA
+ACCGGTGCAGGTGATAATACTGCAAGGATTTGGGACTGTGACACCCAAACGCCAATGCAT
+ACTCTAAAGGGTCACTACAATTGGGTTCTCTGCGTTTCCTGGTCCCCCGATGGAGAAGTA
+ATTGCTACGGGATCCATGGACAATACCATAAGATTATGGGACCCAAAAAGCGGTCAGTGT
+CTAGGTGATGCTCTCAGAGGTCATTCCAAGTGGATCACTTCTTTAAGTTGGGAACCTATA
+CATCTTGTGAAGCCGGGCTCCAAACCAAGATTAGCTTCATCTTCTAAGGATGGTACTATT
+AAGATTTGGGACACTGTGAGCAGAGTTTGCCAGTATACGATGAGTGGTCACACAAATTCA
+GTGTCTTGTGTCAAATGGGGCGGCCAAGGTCTATTGTATAGTGGCTCTCACGATAGAACC
+GTACGTGTATGGGACATCAATTCGCAGGGCAGATGTATCAACATTTTGAAGTCGCATGCG
+CACTGGGTTAATCACTTATCTTTATCTACAGATTACGCATTGCGCATTGGTGCATTCGAT
+CATACAGGTAAGAAGCCTTCTACACCAGAAGAAGCCCAGAAAAAGGCATTGGAAAATTAT
+GAAAAAATCTGTAAAAAGAATGGAAATTCAGAAGAAATGATGGTTACTGCAAGCGATGAT
+TATACCATGTTTTTATGGAACCCACTAAAATCTACCAAGCCTATAGCAAGAATGACCGGT
+CACCAAAAATTAGTCAATCATGTGGCGTTCAGCCCTGATGGTAGGTATATTGTCTCAGCG
+TCTTTTGATAACTCTATCAAACTTTGGGACGGTAGAGATGGTAAGTTTATCTCCACATTT
+AGAGGGCATATAGCCAGCGTATACCAGGTTGCGTGGTCATCGGACTGCCGACTACTGGTG
+TCATGTTCCAAAGATACCACGTTGAAAGTGTGGGATGTAAGAACTAGAAAACTTTCTGTT
+GACCTCCCTGGTCATAAAGACGAAGTTTATACCGTCGACTGGAGTGTCGACGGTAAAAGA
+GTGTGTAGTGGTGGGAAAGACAAGATGGTAAGATTGTGGACGCATTGA
+>SSK22 3945 residues Pha 0 Code 0
+ATGATGATGGATATACTGAATACACAGCAACAAAAAGCGGCTGAAGGCGGGAGAGTTCTG
+GCTCCTCATACCATCTCAAGTAAGCTCGTGAAGAGATTATCAAGTCATTCCAGCCATAAA
+CTATCAAGATCTGATTTGAAAGCATTGGGTGGCTCGGAAACAATAAGCGACGGCCCCAGT
+CAGCTGACTTTTAAGGACCGATACGTTTTCAATGAATCGCTATACTTGAAAAAGCTAAAA
+AAGACCGCTTTAGATGACTACTACACGAGGGGCATAAAACTCACTAACCGCTACGAGGAA
+GACGACGGTGATGACGAAATTATTCGGTTGTCTAATGGCGACAGAATTGATGAAGACCTG
+CACTCAGGTGTCAAGTTTTTCTCCACTACACCTTATTGCAGGAAAATGAGGTCAGACAGT
+GATGAACTAGCTTGGAATGAAATTGCGACCGAACGGTTCAAATGGCAGTCAATGCTGGCC
+AGAGTGCTGAAGGGAGATATTGTTAAAGGTGAAAAGACGAGGATTGCTAACCAAGTCAAG
+AAACCAGGGTTAAATAAGGAGCTCTCAGATGAGATATGGCTCGAATTGAAGGCATGGCTG
+AATGGGAGGACCATGCAAGAGATGGAACAGTCGCTTACATATTTAAGAGATAGTTCAGAT
+TCCGTTTTTGAAGAGATAATGAAGTTTCAAATTCCACAGGGCAAGATATTGAGCCTGGAT
+GCACTGGAGGCCATCTTACAAGACCTCATGAACAGATATCACAGCGTTGTCTCTTATTGG
+CCTAACTTGAAAAAAATGTATAAGGATAAACCAATCACCAATACTGCAGAATTTACCGCT
+AGAATAGACGTAATGAATTCTTGGCTGAACTTTAAAACGAACTTAACGTTGAGGAGGCAA
+GAGTTGGACGACTGGATAAACCGTTTCTCACCGATAAGTAGTTCGGATAATTGCCAAGAG
+GATTTTGATGGTGTGCCCCAATGGAACTGCAAAATGAAGATTCTTGCAGAACAATTGATG
+AAGGAAAAGAACATCGAGTCTATATTCCAAAAAAAAATTTTCTATCCGCTATCACCTTGG
+ATGTTCAAACTGAAACTACATTTTATAGTCTACAGAGAAACTTTGACAAAGATGAACATA
+AAATATCCTTATGAAAGGTTAAGATCACTACTGGCGTTCCCCGTCTATTTAATCAAAGAA
+GTTATTTTGACTAGATTGTCATATGCACGAAAGCTTAAAAATCCAACAATGATGATGATC
+GATCAAATGATCGATGATTTTAACGCTTTTATTCGACTTTCTGTGCAATTGAAGTACACA
+CTGACAAAATATTGCTCCAATTTGCCGTTCGATGTGGATTTTGACCCGACGTTCGAAAAT
+ACTGTAATAGAAGCCATTCGTTATTTATTTTTTCTGTTGAATTTAAAGTTGATTGATTCC
+AGTAAACAAAATTTCAAAGCACCCGATCTACTCTTGAAATACTGGGATCACCTAAAAAAC
+ACCGGTCACTATATTAACGGTGCAGAAACCGTGATTCCAAATGAATTTCTCAAGTTAACT
+TTGAGACTCGTACATAAATTGCAATTCTATCTTTTGAAACAACAAAACTTCCCACCAACA
+TTTGCTAACGCTTCAGAAGCAGAAAAATGGCTAAGTTCCATTTTCGAAAATTTGGGTGCC
+ATGAAAAGAAAGCTGAACAGGTTCAGCAATATTCTAGTCAAGGCGTTCCAAAATTCTGCT
+GTTTATCAGATTAATCATAATGCACAACTTGTTAAAAAGTTAAAAGATGCTCACTATTTT
+TTGGTATACTCCGGTAACACTTTTGAGTCTAGTGGTGTATATATGTTTGCTGCTCCTGAA
+TTATTAGGTTGTGACAATGATACCATCTTAAGAATTTTGCGAAATAAATCCATTGGCTGT
+GATTTGGTCCCAAAGCTTGACATTGGAAATAATTTGAATGTGTATGATATAACAACAAAA
+GAAACAGATTTGAACATTCTAGTATCGAAAGGGGAGGATTCCAAAGGAATTCCTTACTAC
+CGAGTAGTAGCAAATTCGTCAAGTGATTTGGACAGGCATGCTCATCAGTCCAAAAAGAAG
+AATTTTTCAACAGACCCTTTTGATCAGCACCTTGATGAAAAGAACAATGAAGTTTTTGAA
+TTGGAAGTTGCTTTGAGCTCATTGGGTGCACTAGTTGTACTATATCCTGGAGAGCCAGTA
+GTTTGGGATGGACCAGTATATAAGCTTCCAGGTAACAACCTTTTTGCATCCAACGAAATG
+GATTTAGGGAAAATTGGTAACCCAAATACGTTGATTTTACTCAATCAAGGTTCTAATTAT
+GCACTGACTTATCAAATCGACAAGTTTAATCAAACGGTAGGTGATTCTGTTTCATTCATA
+GAGAAACGTTGTTCACTCAATTCAATTGAATCCTCCCTACAAAAAATCAATAAGGCATAT
+TACAAACTTACTTATACAGTATTGAACAACTACAAAGGAATTCTAGGTAGCTTTATGAAG
+CAATGTCCGGGAAATGAGTTGTTAAATTCGATATTCATGTTTGGAAGGGATTTTGGAAGA
+AGTTTCCTTAAATATAACGCCTTTAGCTCAAAGAGGAAGTACGTTATCATCTTTCTGATG
+GTTAAATTAGGAATGAACTGGTTGAAATTCCTTGTTGAAGAGTGTGATCCTACCGATCAG
+CGAACTTTCCGATGGTGCGTTCTTGCAATGGATTTTGCGATGCAGATGACTAGTGGTTAT
+AATATCCTGGCGCTGAATGTAAAGCAATTTCAAGAACTGAAGGAGAGGGTATCAGTATGT
+ATGTCATTATTAATTTCACATTTCGACGTTATGGGTGCACGAGCCACTGAAGCTGAAAAT
+GGCATGCAACAGGCAAGATTGAATATTGATACTGAAGAGAATATTGATGAAGAGGCCACC
+CTAGAAATAAACAGCAGGTTGAGACTGGAAGCTATAAAGACGTTGGAAAAGACTATGAAG
+AGGAATCCCAGGCAAATGGGTAAGGTATTGGATGCTACAGATCAGGGAAACAAATACCTA
+CTATCGCTAGCATCCTCATTATCGAATGTATCAATGAGGTGGCAAAAAAGAAGCTTCATT
+GGCGGTGGAACATTTGGACAGGTATACTCTGCAATTAATCTGGAAAACGGTGAAATCTTA
+GCTGTTAAGGAAATAAAGATACACGATACCACAACAATGAAGAAGATTTTTCCCCTGATT
+AAAGAAGAGATGACCGTATTGGAAATGTTAAACCATCCTAATATTGTCCAGTACTATGGT
+GTCGAAGTACATCGCGATAAAGTTAACATCTTCATGGAATACTGTGAGGGTGGTTCTTTA
+GCCTCGTTATTGGATCATGGAAGAATTGAAGATGAAATGGTAACACAAGTGTACACATTC
+GAACTATTAGAAGGTTTGGCATATTTGCACCAATCTGGCGTGGTGCATCGCGACATTAAA
+CCGGAGAATATCTTGCTGGATTTCAATGGAATCATAAAATATGTGGATTTTGGTACGGCA
+CGTACCGTTGTAGGATCTAGGACTAGAACTGTGCGGAACGCAGCCGTTCAAGATTTTGGA
+GTAGAAACAAAGTCCCTCAATGAAATGATGGGGACACCGATGTATATGGCTCCAGAGACT
+ATTTCAGGCTCGGCAGTTAAGGGAAAACTTGGAGCGGACGATGTATGGGCATTAGGATGT
+GTTGTGCTAGAAATGGCCACAGGTAGACGACCTTGGTCTAACTTGGATAATGAATGGGCC
+ATCATGTACCACGTTGCTGCAGGTCGAATACCGCAACTACCCAATAGAGACGAAATGACT
+GCAGCGGGAAGAGCCCTTCTTGGAAAGGTGTTTGGTTCAAGACCCCACTATGAGGGCTAC
+TGCTGTGGAACTACTGATAGACCCTTGGATGATACAAATCCGTGA
+>SOL2 948 residues Pha 0 Code 0
+ATGACTACGACGGTACCCAAGATATTCGCGTTTCACGAGTTTTCAGACGTGGCAGAGGCC
+GTAGCTGACCATGTAGTCCACGCGCAAGACGGTGCATTGGCTCCAAAGAACGAGAGGAAA
+CACTCTGTTCCCAACATCAGCATGAATGCACTGGATATGACGAGAGAGGCCTCTTGCAAA
+AGCACAGCATCTGCCGCGGAAGGGAAAAGTGGTAGCAGTGGTAGTGGCAGTGGTAGCAGT
+AAGCCCAAAAAGGAGAAACGGTTCAAGATTGCTCTCTCCGGTGGGTCATTGATCGAAGTG
+CTACACGAAGGTCTGCTAAAACGAGACGATGTACGGTGGGGAGACTGGGACATTTACTTT
+GCAGACGAGAGACTTGTACCCTTCAGCTCGAATGAAAGCAATTATGGATGCGCCAAAAGG
+AAGATTTTGGACCTGATAGACACGGCGAAGTATGGAACTCCGAAGGTGTACCACATTGAC
+GAGTCATTGATTGACGACCCGCAAGAATGCGTTGATAACTATGAAAAGGTGCTAATCCGC
+GGGTTTGCCGGTAGAGATTCCGTCAAACTTCCGATGTTCGACTTGTTCCTGCTTGGTTGT
+GCCCCCGATGGTCATATCGCATCACTCTTCCCTAACTTCCAGGACAATCTACGTGAGAAA
+CTTGCATGGGTGGTGCCCGTGGAGAACGCTCCTAGTGGGCCCTCGACCAGAATTTCGCTG
+ACTATACCTGTAATCTGCCATTCTCACAGGGTTACTTTCGTTGTCGAAGGTGCAACCAAG
+GCGCCCATCATCAAGACCATTATGGAAAGGCCTGAAAAGGGCCTACCTAGCAGTATTGTC
+AACGAAGGTGCTGCTGGTCGTGTATCATGGTTTGTTGACGACGATGCTCTTACGGACGTC
+CTCGTCACCAAAAAAAAGTATAAATTCCACCAAGGTTTGTCTATTTAA
+>ERS1 783 residues Pha 0 Code 0
+ATGGTGTCGTTAGACGATATACTAGGTATCGTGTATGTTACGTCATGGTCGATATCGATG
+TATCCACCGATAATCACCAATTGGCGCCATAAGTCAGCGAGCGCGATATCGATGGATTTT
+GTCATGTTAAATACGGCAGGTTACTCTTACCTGGTCATATCCATATTTTTGCAATTGTAC
+TGCTGGAAAATGACGGGTGATGAGTCTGACTTGGGCAGGCCCAAGTTGACGCAATTTGAT
+TTCTGGTATTGCCTGCATGGGTGCTTGATGAATGTTGTCTTATTGACCCAGGTGGTAGCT
+GGAGCGAGAATCTGGCGATTTCCAGGTAAAGGTCACCGCAAGATGAATCCATGGTACCTA
+AGGATTTTACTCGCATCACTGGCCATTTTTTCACTGCTAACCGTACAATTTATGTACTCC
+AACTACTGGTACGATTGGCATAACTCAAGAACTCTGGCGTATTGCAACAATTTGTTTTTA
+CTCAAAATATCGATGTCACTAATCAAGTACATCCCACAAGTGACGCATAACTCGACAAGA
+AAATCTATGGATTGTTTCCCCATTCAGGGTGTGTTTCTAGATGTCACTGGCGGTATCGCC
+TCGCTGCTCCAATTGATTTGGCAGTTGTCTAACGATCAAGGTTTCAGTCTGGATACGTTC
+GTGACAAATTTTGGAAAAGTGGGACTGTCAATGGTAACTTTAATATTCAACTTCATCTTT
+ATCATGCAGTGGTTTGTATATCGATCTCGAGGCCATGATCTGGCGTCAGAGTACCCGCTG
+TAG
+>PAT1 2394 residues Pha 0 Code 0
+ATGTCCTTCTTTGGGTTAGAAAATAGCGGTAATGCGCGGGATGGTCCTCTGGACTTTGAA
+GAGAGTTACAAGGGCTATGGCGAGCACGAACTTGAGGAGAACGACTATTTGAACGACGAA
+ACATTTGGTGATAATGTTCAGGTTGGTACCGACTTTGATTTTGGAAATCCTCACAGCAGC
+GGCAGCAGCGGCAACGCAATTGGTGGTAATGGCGTCGGTGCCACGGCTAGATCATATGTT
+GCAGCTACTGCAGAAGGAATTAGCGGCCCTAGGACCGATGGAACGGCAGCAGCAGGACCT
+CTAGACCTGAAGCCAATGGAATCTTTGTGGTCTACTGCACCACCTCCAGCAATGGCGCCT
+TCACCCCAAAGTACAATGGCTCCGGCTCCTGCTCCGCAGCAAATGGCCCCCCTACAGCCA
+ATCTTGTCGATGCAAGACTTGGAAAGACAACAACGTCAAATGCAGCAACAGTTTATGAAT
+TTCCACGCCATGGGTCATCCACAGGGTCTCCCACAGGGTCCGCCTCAGCAGCAATTTCCA
+ATGCAGCCTGCGTCGGGTCAACCAGGTCCCTCACAATTTGCGCCTCCACCTCCACCTCCT
+GGCGTTAATGTGAATATGAATCAAATGCCAATGGGTCCTGTACAAGTTCCAGTTCAAGCT
+TCGCCTTCACCCATCGGTATGTCCAACACTCCTTCTCCAGGCCCTGTGGTTGGCGCAACT
+AAAATGCCTCTGCAAAGTGGACGCAGATCGAAGAGAGATTTGTCGCCTGAAGAGCAAAGA
+CGTTTGCAGATTCGTCATGCCAAAGTGGAGAAAATCTTGAAATACTCAGGTTTAATGACT
+CCTCGTGATAAGGACTTCATCACCAGATATCAGTTGTCTCAAATTGTCACTGAGGACCCT
+TACAATGAGGATTTCTACTTCCAGGTCTACAAGATTATCCAAAGAGGCGGTATCACGTCC
+GAATCCAACAAAGGTTTGATTGCTAGGGCGTATTTGGAACATTCTGGACACAGACTCGGT
+GGTCGCTATAAGAGAACCGATATTGCCCTACAGAGAATGCAAAGTCAAGTAGAAAAGGCT
+GTCACTGTGGCTAAGGAAAGACCTTCTAAGTTGAAGGATCAACAAGCGGCTGCTGGTAAC
+TCTAGCCAGGATAATAAGCAAGCAAACACGGTTCTGGGCAAAATCTCTTCCACTTTGAAC
+AGCAAGAATCCAAGAAGACAACTGCAGATCCCCAGACAACAGCCTTCTTCTGACCCCGAT
+GCGCTAAAAGACGTCACTGACTCTCTGACCAACGTGGACTTGGCCTCTTCAGGGTCCTCC
+TCTACGGGCTCTTCTGCCGCTGCTGTTGCTTCTAAGCAAAGAAGAAGATCTTCATACGCG
+TTCAACAACGGTAATGGTGCCACAAATTTGAACAAATCTGGGGGCAAAAAATTCATTCTT
+GAGTTAATTGAAACAGTTTATGAAGAGATTTTAGACTTGGAAGCTAACTTGAGGAATGGC
+CAGCAAACTGACAGCACTGCAATGTGGGAGGCCCTTCACATCGACGACAGTTCATATGAC
+GTAAACCCTTTCATTTCGATGCTATCATTTGATAAAGGTATCAAGATTATGCCTAGAATT
+TTTAATTTCTTGGATAAGCAGCAAAAATTGAAAATCCTGCAAAAAATCTTCAATGAATTA
+TCACACTTGCAAATCATCATATTGAGTTCCTACAAGACTACACCAAAACCAACTTTGACA
+CAATTGAAGAAAGTCGATCTGTTCCAAATGATCATATTAAAGATCATTGTCTCGTTTTTG
+TCTAATAACTCCAATTTTATCGAAATTATGGGTCTGTTGCTACAGTTAATCAGAAACAAC
+AACGTTTCGTTCTTGACCACCTCCAAAATTGGTCTAAATTTGATCACCATTTTGATTTCT
+CGTGCCGCATTAATCAAGCAAGATTCATCAAGATCTAATATTCTTTCCTCTCCTGAAATC
+TCCACATGGAATGAGATTTATGATAAATTATTCACTTCATTGGAAAGTAAGATTCAGCTG
+ATTTTCCCTCCAAGGGAATATAACGTCCACATCATGCGTTTACAAAATGACAAGTTTATG
+GATGAAGCATACTTTGGCCAGTTCCTAGCTAGTTTAGCACTAAGTGGAAAGCTAAACCAC
+CAGAGAATCATTATTGATGAAGTACGTGATGAAATCTTTGCCACTATTAACGAGGCGGAG
+ACCTTACAAAAGAAAGAGAAAGAATTGAGTGTATTACCTCAGAGGTCTCAAGAATTAGAC
+ACAGAGTTAAAATCTATTATTTATAATAAAGAGAAACTATACCAAGATTTGAATTTGTTC
+CTAAACGTTATGGGGTTGGTGTATCGCGATGGTGAAATATCAGAACTAAAGTAA
+>SRB8 4284 residues Pha 0 Code 0
+ATGAATAACGGTTCTGGTCGATACTTGCTGACTCCCCCAGATGATCTTCACCCCTATGTG
+CCAAGCTCGAAACCTCAGGAACAAGTATACCCTGATTTCAAGCCTTGGGAGCACACTGCA
+GCAGAAGATCAAATCCTAGCAAACTTTGTGGCTAAGGGCTTTTACCATACACCAATGGTA
+AATTTCGAGTCCATATCTGCGAGATCATCTGTTCATGAATCATTAGTCACTCAATCCAAC
+ATTCTTTCCCAGCAATTCGACAAAATTATCAAGATTAGAGAAGACCACATTAATAAGATC
+CCCTCAAATTCCACGACGACATTACACGGGCCTGGTTTTCAGTTGCCTAATAGAATAACC
+CTTACTGATCATAGAAAGGAAACGTGGTTGCATGAATTGAGTTCGTCTCACACTTCGCTG
+GTCAAAATTGGCAAGTTTATACCTCACGGCTTGAAAAGAAGGCAAGTCATCGAGCAGTGC
+TATTTAAAATTTATACCATTGAAAAGGGCGATTTGGTTGATAAAGTGCTGCTATTTTATC
+GAATGGAAATCGAACCACAAAAAGAAGAGGTCAAATGCTGCTGGGGCAGATGATGCCATT
+TCCATGCACCTGCTAAAGGACTGGACGGATACCTTTGTATACATCCTGGAAAAGCTCATC
+TTTGATATGACAAATCACTATAACGATTCTCAACAACTGCGTACGTGGAAGAGGCAGATT
+TCTTATTTTTTAAAACTTTTGGGGAATTGCTACTCACTAAGATTGATCAATAAGGAAATC
+TTTCATCATTGGCTTGTAGAGTTTATAAATAAGATGGAAAACTTCGAATTTTTGCCATTA
+TCTTTACATATTTTGATGATTTTTTGGAACGACATCTGCCAAATTGATACAAATGCTCCT
+GTTGCGGCTACAATAACATCAAGTCAAAAAGAGCCCTTCTTTCTGGTAACAAAAATCACT
+GATATGCTATTGCACAAATATTATATTGTTTCCAGCAGCAAATCAATGATAAATGACGAG
+AACTACATCATCAATGATATAAAGAAAAACAACAAGATAAAGTTGAATATTCTCAAAATA
+TTATCCAGTTTAATTTTGAAAATTTTTCAAGAACAATCTTTAGAGGTGTTTATATTTCCC
+ACATCTAACTGGGAAATTTACAAGCCCTTACTTTTTGAAATAGTCTCAAACGCCGACACT
+AATCAAAATTCTGATATGAAGAAAAAATTAGAGTTAATTAGTTACAGAAACGAGTCATTG
+AAGAATAATTCTTCTATACGAAACGTAATAATGTCTGCCAGCAACGCAAATGACTTTCAA
+TTAACTATCGTCACCTGTAAACAATTTCCAAAACTATCATGCATTCAATTAAATTGTATA
+GATACTCAGTTCACCAAGCTACTGGACGATAACCCTACAGAATTCGATTGGCCCACTTAC
+GTTGACCAAAATCCCCTTACAATGCATAAAATTATTCAATTAATTCTCTGGTCCATACAT
+CCATCAAGGCAATTTGATCACTATGAATCTAATCAACTGGTAGCGAAATTATTACTATTG
+CGAATAAATTCAACAGATGAGGATTTGCACGAATTCCAGATAGAAGATGCCATTTGGTCA
+TTGGTTTTCCAATTAGCCAAAAATTTTTCGGCCCAAAAGAGGGTGGTATCATATATGATG
+CCTTCTTTGTATCGCCTGCTTAATATACTAATTACTTATGGCATCATTAAGGTCCCTACG
+TATATCAGAAAGCTAATCAGTTCCGGCCTACTTTATCTCCAAGATTCCAATGATAAGTTT
+GTGCATGTCCAGCTGTTAATTAACTTGAAAATTTCACCGTTGATGAAAAGTCAATACAAT
+ATGGTATTGAGGAACGTTATGGAATATGACGTTAAATTTTATGAAATTTTTAATTTCGAC
+CAACTCGTGGAAATCACAGAACAAATCAAAATGCGAATACTCTCCAATGATATAACTAAT
+TTGCAACTGTCGAAAACTCCTCTGAGCATTAAAATCATGGTTGCAGAATGGTACTTATCA
+CATTTATGTTCCGGTATTTTATCTAGTGTTAACCGCACAGTGTTGCTAAAAATATTCAAG
+ATTTTTTGTATCGATCTGGAGGTTTTCCACCACTTTTTTAAGTGGATCGAGTTTATTGTC
+TACCATCAATTGCTAAGTGATATAGAATCTCTGGAGGCATTGATGGACATCTTGCTATGC
+TACCAAAAATTGTTCTCACAATTCATTAATGACCATATTCTTTTTACGAAGACGTTCATA
+TTCATTTACAAGAAAGTTTTGAAAGAAAAAGACGTGCCTGCTTATAATGTGACTTCATTT
+ATGCCATTCTGGAAATTTTTTATGAAAAACTTCCCTTTTGTTTTAAAGGTGGATAACGAT
+TTAAGGATTGAGTTACAATCTGTTTACAATGATGAGAAATTGAAAACTGAGAAGCTGAAG
+AATGATAAATCAGAAGTCTTGAAGGTGTATTCCATGATCAATAATTCAAACCAAGCTGTT
+GGACAGACTTGGAATTTTCCCGAGGTGTTTCAAGTAAACATCAGGTTTCTACTACACAAC
+TCCGAGATCATTGATACAAATACAAGCAAACAGTTCCAGAAAGCACGAAACAATGTCATG
+CTTTTGATTGCCACTAACTTGAAGGAGTACAATAAATTTATGTCCATTTTCTTGAAAAGG
+AAAGACTTTACTAACAAAAATTTAATTCAATTGATCTCTCTAAAACTTCTAACTTTTGAA
+GTGACGCAGAATGTGTTGGGGCTCGAGTATATTATTCGATTATTACCAATAAACTTGGAA
+AATAATGACGGCTCATATGGTCTGTTTTTGAAGTATCATAAAGAACAATTCATAAAGTCA
+AATTTTGAGAAAATTTTACTTACATGTTATGAATTAGAAAAAAAATATCATGGCAACGAA
+TGTGAAATAAATTATTATGAGATCCTATTGAAAATTTTAATAACTTATGGGTCATCTCCC
+AAATTACTTGCAACATCTACAAAAATCATTATGTTGTTATTGAATGATAGCGTGGAAAAC
+TCATCTAATATTTTGGAGGATATTTTGTACTACTCAACTTGTCCGTCGGAAACCGATCTT
+AACGATATTCCATTGGGTAGTGGACAACCAGACAATGACACTGTTGTAACCAACGATGAT
+AAAAGTGACGATGATGATCACACAGTCGACGAAATTGATCATGTAGAATATTACGTTATG
+ATGGACTTTGCCAATCTTTGGGTTTTCCAAGCGTTTACCTGTTTCTGCATCAAAAAAATC
+ATGGAGAATAATGAGCCAGCAATGGCAATGGAAGACTTGAAGAACTTCATATTCCAAATT
+ATCGAAATAACTAATTCTAATGATTTATGTTCACAAATATTTGACCAACTGAAGGATATG
+CAGACCATTGAGATGATAACCCAAATAGTGGAGAAAGATTTCTGCACTTCTTGTTTGCAA
+AACAACAACCAAAAGATAGATGATAATTACATCGTTGTGGTGATCGAGATTATAACGTCA
+TTATCGATGAGGTTTCAAAGAGAAACTTCTGGTATGATAGTTATTTCCATGGAGAACTAT
+CATTTACTAATAAAGATCATAAGACAATTAAGTGAACTGAACGAAGGAAATTTATCTAAG
+AGAGAAATCCAAATAGATGCCGTCTTGAAAATTTTTAGCTTTCATCAGGATTCCATTTTC
+CAACGCATCATCGCTGATTTATCAGCTGATAAACCCACAAGTCCATTCATTGATAGCATA
+TGCAAGCTGTTTGATAAAATATCATTTAATTTAAGATTGAAGCTGTTCTTGTACGAAATT
+TTGTCTTCATTGAAATCATTCGCCATCTATTCATCCACAATTGATGCCCCAGCATTCCAC
+ACAAGCGGTAAGGTCGAACTACCGAAGAAATTGCTGAACTTACCACCATTCCAAGTGTCC
+TCTTTCGTTAAGGAAACAAAACTTCATAGTGGCGACTACGGGGAAGAAGAAGATGCAGAC
+CAAGAAGAATCGTTTAGTTTAAATTTAGGAATCGGCATAGTTGAAATAGCGCACGAAAAC
+GAACAGAAATGGCTCATTTATGACAAGAAAGATCATAAATATGTCTGCACATTTTCCATG
+GAGCCGTACCACTTCATCTCCAACTATAATACCAAGTACACAGATGACATGGCTACAGGC
+AGTAATGATACGACTGCGTTTAACGATTCCTGTGTAAACCTGAGTCTTTTTGATGCTCGG
+TTTGAGAGGAAAAATCCACATTGA
+>YCX3 384 residues Pha 0 Code 0
+ATGTTGTTCTATAAGCCTGTGATGAGGATGGCGGTGAGACCGCTAAAAAGCATAAGATTC
+CAGTCCTCATACACCAGTATTACTAAATTGACGAACCTAACAGAATTTAGGAATTTGATC
+AAGCAAAATGATAAACTAGTCATCGATTTTTATGCTACTTGGTGTGGCCCCTGTAAGATG
+ATGCAACCACACTTAACGAAATTAATTCAGGCTTATCCAGATGTAAGATTTGTCAAGTGC
+GACGTGGACGAATCACCAGATATTGCCAAAGAGTGTGAAGTGACGGCTATGCCCACCTTT
+GTTCTTGGCAAGGATGGCCAACTCATCGGCAAGATCATTGGAGCTAACCCTACTGCTTTA
+GAGAAGGGAATCAAAGATCTATAA
+>TUP1 2142 residues Pha 0 Code 0
+ATGACTGCCAGCGTTTCGAATACGCAGAATAAGCTGAATGAGCTTCTCGATGCCATCAGA
+CAGGAGTTTCTCCAAGTCTCACAAGAGGCAAATACCTACCGTCTTCAAAACCAAAAGGAT
+TACGATTTCAAAATGAACCAGCAGCTGGCTGAGATGCAGCAGATAAGAAACACCGTCTAC
+GAACTGGAACTAACTCACAGGAAAATGAAGGACGCGTACGAAGAAGAGATCAAGCACTTG
+AAACTAGGGCTGGAGCAAAGAGACCATCAAATTGCATCTTTGACCGTCCAGCAACAGCGG
+CAACAGCAACAGCAGCAACAGGTCCAGCAGCATTTACAACAGCAACAGCAGCAGCTAGCC
+GCTGCATCTGCATCTGTTCCAGTTGCGCAACAACCACCGGCTACTACTTCGGCCACCGCC
+ACTCCAGCAGCAAACACAACTACTGGTTCGCCATCGGCCTTCCCAGTACAAGCTAGCCGT
+CCTAATCTGGTTGGCTCACAGTTGCCTACCACCACTTTGCCTGTGGTGTCCTCAAACGCC
+CAACAACAACTACCACAACAGCAACTGCAACAGCAGCAACTTCAACAACAGCAACCACCT
+CCCCAGGTTTCCGTGGCACCATTGAGTAACACAGCCATCAACGGATCTCCTACTTCTAAA
+GAGACCACTACTTTACCCTCTGTCAAGGCACCTGAATCTACGTTGAAAGAAACTGAACCG
+GAAAATAATAATACCTCGAAGATAAATGACACCGGATCCGCCACCACGGCCACCACTACC
+ACCGCAACTGAAACTGAAATCAAACCTAAGGAGGAAGACGCCACCCCGGCTAGTTTGCAC
+CAGGATCACTACTTAGTCCCTTATAATCAAAGAGCAAACCACTCTAAACCTATCCCACCT
+TTCCTTTTGGATCTAGATTCCCAGTCTGTTCCCGATGCTCTGAAGAAGCAAACAAATGAT
+TATTATATTTTATACAACCCGGCACTACCAAGAGAAATTGACGTTGAGTTACACAAATCT
+TTGGATCATACTTCAGTTGTTTGTTGCGTGAAGTTCAGTAACGATGGTGAATACTTAGCC
+ACAGGCTGCAACAAAACTACTCAAGTGTATCGCGTTTCAGATGGTTCTCTGGTGGCCCGT
+CTATCTGACGATTCTGCTGCCAATAACCATCGAAATTCGATCACTGAAAATAACACCACC
+ACGTCCACGGATAACAATACAATGACAACCACTACTACCACCACAATTACTACCACAGCG
+ATGACTTCGGCAGCAGAATTGGCAAAAGATGTGGAAAACCTGAACACTTCGTCTTCCCCA
+TCATCCGACTTGTATATCCGTTCAGTGTGTTTTTCTCCAGATGGGAAATTTTTGGCAACA
+GGTGCTGAAGACAGACTGATTAGAATTTGGGATATTGAAAATAGAAAGATTGTTATGATT
+CTTCAAGGCCACGAACAAGATATTTATTCATTGGACTACTTTCCCTCAGGTGACAAATTA
+GTCTCCGGTTCTGGTGACCGTACCGTTCGTATTTGGGACTTACGTACAGGCCAGTGTTCA
+TTGACTTTATCCATTGAAGATGGTGTTACCACCGTCGCTGTATCACCAGGTGATGGTAAA
+TACATCGCTGCTGGTTCTCTAGATCGTGCTGTGAGAGTTTGGGATTCCGAGACCGGATTC
+TTGGTGGAAAGACTAGATTCGGAAAACGAATCCGGTACAGGCCACAAGGACTCTGTTTAT
+AGCGTTGTCTTCACTAGAGATGGACAAAGCGTTGTATCCGGCTCATTAGATAGATCTGTT
+AAGCTCTGGAATTTGCAGAATGCAAACAACAAGAGCGATTCGAAAACTCCAAATTCCGGC
+ACTTGTGAAGTTACGTATATCGGGCATAAAGACTTTGTATTGTCCGTGGCCACCACACAA
+AATGATGAGTACATCTTGTCCGGTTCCAAAGATCGTGGTGTCCTGTTTTGGGATAAGAAA
+TCCGGCAATCCGTTATTGATGTTGCAAGGTCATAGGAATTCAGTTATATCTGTGGCTGTG
+GCAAACGGGTCTCCGCTGGGTCCAGAATATAACGTTTTTGCTACTGGTAGCGGTGATTGT
+AAAGCAAGGATTTGGAAGTATAAAAAAATAGCGCCAAATTAA
+>YC16 462 residues Pha 0 Code 0
+ATGGTTACGTTCAACTGTGAGGTGTGTAATGATACTGTGCCCAAGAAGAATACCGAAAAG
+CATTATTATAGATGTCCTAACGCGTACTATACATGCATAGATTGCTCCAAGACGTTTGAA
+GATGGCGTGAGTTACAAGAATCACACGTCTTGCATCAGCGAGGACGAGAAGTACCAGAAA
+GCGTTGTACAAGGGCAACAAGAAGCAGAAGCAGAAGCAGCAGCAGAAGCAGCAGCAGAAG
+CAGCACCAGCACCAGCCAGTGGCAACTCCTGCAAAGAAAGTGGAGAAGCCTGTGATCAAG
+AAGGCAGAGAAAGTGGAAAAGACCTCGAACGGTATCGAGCTTCACAAGGGCAAGTCGTTG
+TACAAAATTTTGAAAACCATGAAGGATAAAGGGGCAAAAAAGACCTTCTTGAAAAGTCTG
+GTTGTGGATTCTGAGGGGCAAATCAGGTATGCAAAGGAATAA
+>ABP1 1779 residues Pha 0 Code 0
+ATGGCTTTGGAACCTATTGATTATACTACTCACTCGAGAGAGATCGACGCAGAGTACCTG
+AAGATTGTCAGAGGCTCCGATCCTGACACCACCTGGTTGATTATTTCACCCAATGCGAAA
+AAAGAATACGAACCTGAGTCTACCGGTTCCTCCTTTCACGATTTCTTGCAATTGTTTGAT
+GAAACCAAGGTCCAGTACGGACTGGCACGTGTGTCCCCACCAGGGTCAGACGTTGAGAAG
+ATTATTATCATTGGTTGGTGTCCTGATTCTGCGCCATTGAAGACAAGGGCCTCTTTCGCC
+GCCAATTTTGCTGCAGTTGCTAATAATCTGTTCAAGGGTTACCACGTTCAAGTTACCGCC
+AGAGACGAGGACGATCTTGACGAAAATGAACTGTTGATGAAAATCAGTAACGCGGCCGGT
+GCCCGTTATTCTATTCAGACTTCCTCCAAGCAACAGGGGAAGGCTTCCACTCCTCCCGTG
+AAGAAATCCTTCACACCTTCCAAGAGCCCTGCTCCAGTTTCTAAGAAGGAACCAGTCAAG
+ACTCCTTCCCCAGCACCTGCTGCTAAGATTTCTTCCCGTGTTAACGACAACAATGACGAC
+GACGATTGGAATGAGCCTGAATTAAAGGAACGCGACTTCGATCAGGCTCCCCTGAAACCA
+AATCAATCATCTTACAAACCAATTGGCAAAATCGACTTGCAAAAAGTGATTGCTGAAGAA
+AAGGCTAAGGAGGACCCACGTCTTGTTCAAAAGCCAACCGCTGCTGGTTCCAAGATTGAT
+CCTAGTTCTGATATCGCTAATTTAAAGAACGAATCAAAATTAAAGAGGGACTCCGAGTTT
+AACTCCTTTTTGGGCACCACTAAACCCCCCTCCATGACGGAATCTTCATTAAAGAATGAT
+GATGATAAAGTCATTAAGGGTTTTAGAAACGAGAAATCACCTGCTCAATTATGGGCCGAA
+AGAAAGGCAAAGCAAAACAGCGGCAACGCCGAAACTAAGGCTGAGGCACCAAAACCTGAA
+GTTCCAGAAGATGAGCCTGAAGGTGAACCTGACGTCAAAGATTTGAAATCAAAATTTGAA
+GGATTGGCCGCTTCAGAAAAAGAGGAGGAAGAAATGGAAAACAAATTTGCTCCTCCTCCA
+AAGAAATCAGAACCAACTATTATCTCACCAAAACCCTTCTCCAAGCCACAAGAACCTGTG
+AAAGCTGAAGAAGCCGAGCAGCCTAAGACTGATTACAAGAAGATCGGCAACCCATTACCC
+GGTATGCACATTGAAGCGGATAATGAGGAAGAACCAGAAGAGAATGATGATGACTGGGAT
+GATGATGAAGACGAGGCTGCTCAACCTCCTTTGCCTTCGAGGAATGTTGCGTCAGGAGCA
+CCAGTGCAAAAAGAAGAGCCTGAACAAGAAGAGATCGCCCCAAGCTTACCTTCTAGAAAC
+TCGATCCCAGCTCCAAAACAAGAAGAAGCACCTGAACAAGCACCTGAAGAAGAAATTGAA
+GAAGAAGCTGAGGAAGCCGCTCCACAGCTGCCATCAAGAAGCTCTGCAGCTCCTCCTCCG
+CCTCCAAGACGAGCAACTCCAGAGAAAAAGCCAAAGGAAAATCCTTGGGCCACAGCAGAA
+TATGATTACGATGCTGCAGAAGATAACGAACTGACCTTTGTGGAAAATGACAAGATTATC
+AATATTGAATTTGTCGACGATGACTGGTGGCTAGGGGAACTAGAGAAAGACGGCTCAAAA
+GGTCTCTTCCCCAGCAATTATGTGTCTTTGGGCAACTAG
+>KIN82 2181 residues Pha 0 Code 0
+ATGACTCAGCAAGAATACCGTTCCCCCTCACAACGCTTATCCAAGGGGAGGAGCATGTCG
+CTACCCAAAATATTTGCTCGTAATTTGAGATCTCTGCAAAACAATGCACCTCCTGGCAAA
+AACATCAATGTCAATTGTTTGAACGTCAATTCTTGTTCGTTGTCCGCAAGCCCAAGCTCA
+CAAATTAATATGGCTTGTAATGGAAACAAGCAAGATCTTCCCATACCGTTTCCCCTGCAT
+GTAGAATGCAACGATAGCTGGTCAAGCTCCAAACTTAACAAGTTCAAATCAATGTTTAAT
+CATAACAGATCAAAGAGCAGTGGTACTACAGATGCGTCAACTTCAGAAAAAGGTACGCAT
+AAGCGTGAACCCCGGTCGACGATACATACAGAGCTGTTACAAAGTTCCATTATCGGTGAG
+CCAAATGTCCATAGTACTACAAGTAGCACACTTATACCCAATGAGGCGATATGCTCCACA
+CCTAATGAGATCTCAGGTAGCTCTTCTCCGGACGCGGAGTTATTTACCTTTGACATGCCC
+ACAGACCCGTCATCCTTCCACACTCCTAGCTCCCCAAGTTATATAGCAAAGGACAGTAGA
+AACCTGAGTAATGGATCTTTGAATGATATTAACGAAAATGAAGAGCTCCAAAATTTCCAT
+AGAAAAATCAGCGAAAATGGCAGTGCCTCCCCCCTGGCTAACTTGTCATTATCCAATTCA
+CCAATTGATTCCCCAAGGAAAAATAGCGAAACCAGAAAGGATCAAATACCTATGAACATA
+ACACCACGTTTAAGGAGGGCCGCTTCCGAACCGTTCAATACGGCAAAGGATGGGTTAATG
+CGGGAAGATTACATTGCCTTGAAACAACCTCCAAGCTTGGGAGATATTGTAGAACCGAGG
+AGATCTCGTCGTTTAAGAACCAAGTCATTCGGTAACAAGTTCCAAGACATTACTGTCGAA
+CCTCAATCCTTCGAAAAAATTAGACTACTTGGCCAAGGTGACGTAGGTAAAGTGTATTTA
+GTGAGGGAACGCGATACCAACCAGATATTCGCCCTGAAAGTTTTGAATAAACATGAGATG
+ATCAAGAGGAAGAAAATTAAACGAGTACTCACTGAACAGGAAATTCTCGCGACAAGTGAT
+CATCCATTTATTGTGACACTGTATCATTCCTTTCAAACCAAAGACTATTTGTATCTCTGT
+ATGGAATACTGCATGGGAGGGGAATTCTTTAGAGCCTTACAAACAAGAAAAAGTAAATGC
+ATTGCAGAAGAAGATGCGAAGTTTTACGCCAGTGAAGTAGTAGCAGCTTTGGAATATTTA
+CACCTACTGGGCTTCATATACAGAGATTTGAAACCCGAAAACATATTACTGCATCAATCT
+GGTCATGTCATGCTTTCTGACTTTGATTTATCCATCCAAGCAACGGGATCAAAAAAACCC
+ACCATGAAAGACTCTACGTATTTAGATACAAAAATTTGTTCAGATGGATTCAGAACTAAT
+TCCTTTGTTGGTACTGAAGAGTATTTAGCTCCAGAAGTAATCAGAGGGAATGGCCACACT
+GCAGCAGTAGACTGGTGGACTTTAGGAATATTGATTTACGAGATGCTATTTGGCTGTACT
+CCATTTAAAGGAGATAATTCAAATGAAACATTCTCTAACATTTTAACCAAGGACGTCAAA
+TTTCCACATGATAAGGAAGTTTCGAAGAATTGTAAAGACCTGATAAAGAAACTACTAAAC
+AAAAACGAGGCAAAAAGGCTTGGTTCCAAATCAGGAGCTGCAGACATAAAGAGACATCCC
+TTCTTCAAAAAAGTTCAGTGGTCGTTCTTAAGAAACCAAGACCCCCCTCTAATACCTGCA
+TTAAATGATAACGGCTGCGAACTTCCTTTTATATTGTCTTGCAATAAACACCCGAAAAGG
+AACTCAGTGAGTGAACAGGAAACCAAAATGTTCTGTGAGAAAGTTGCAAACGATGATGAA
+ATTGATGAGGCTGATCCATTCCATGATTTTAATTCTATGAGTTTAACGAAGAAAGATCAC
+AATATCTTAACCTACTCTGAAAATTATACTACGGAAAAATTCTATACAAAGCAACTTGTA
+CAAGGCCAAGGCATAACAGCTCACATAGAAGTTTCTTTAAAGACATCATACCTGAACTAT
+AACATGTTTACAGAAAGATAA
+>MSH3 3144 residues Pha 0 Code 0
+ATGGTGATAGGTAATGAACCTAAACTGGTACTTTTGAGAGCCAAAAGCAGTGCAAATAGA
+TTTATTTTGTTGAATCTATTAACAATAATGGCGGGACAACCCACAATAAGCAGGTTTTTC
+AAGAAGGCGGTAAAATCAGAGCTGACGCATAAGCAAGAACAAGAAGTTGCGGTTGGAAAT
+GGCGCTGGTAGCGAATCCATCTGCCTTGACACTGATGAAGAGGACAATTTATCTTCTGTT
+GCAAGCACAACAGTAACTAATGATAGCTTTCCACTCAAAGGCAGTGTTTCTTCCAAGAAT
+TCGAAAAATTCAGAAAAGACTAGTGGTACTTCGACAACATTTAATGATATTGACTTTGCT
+AAGAAATTGGATAGGATTATGAAAAGACGAAGTGATGAAAATGTTGAGGCTGAAGATGAT
+GAGGAAGAGGGTGAGGAAGATTTCGTAAAAAAAAAAGCCAGAAAGTCCCCTACAGCGAAA
+CTTACTCCCTTGGACAAACAGGTGAAGGACCTGAAAATGCATCATAGAGATAAAGTGCTT
+GTTATTAGAGTAGGCTACAAGTACAAATGTTTTGCAGAGGATGCAGTAACGGTTAGCAGA
+ATACTTCACATCAAACTTGTGCCTGGAAAATTGACTATCGATGAGTCTAATCCTCAAGAT
+TGCAATCATAGGCAGTTTGCGTACTGTTCTTTCCCGGATGTCAGATTAAACGTTCACCTA
+GAGAGACTTGTGCATCATAATTTAAAGGTTGCCGTGGTAGAGCAAGCAGAAACAAGCGCT
+ATTAAGAAGCATGATCCAGGTGCCAGCAAATCAAGCGTTTTTGAAAGAAAGATTTCAAAT
+GTCTTTACCAAAGCTACATTTGGTGTTAATTCCACCTTTGTCCTTAGGGGGAAACGTATT
+CTCGGTGATACAAACAGTATATGGGCTTTGTCCCGTGACGTACATCAGGGAAAGGTGGCT
+AAATATTCCTTAATTTCTGTCAATTTAAATAACGGGGAAGTCGTGTATGATGAATTTGAA
+GAGCCTAATCTTGCTGATGAGAAACTACAGATACGAATCAAATATTTACAGCCCATAGAA
+GTACTGGTAAATACAGATGATCTTCCATTACATGTAGCGAAATTTTTCAAAGATATTTCA
+TGTCCTTTAATACACAAGCAGGAGTATGATTTGGAAGATCATGTAGTTCAGGCAATAAAA
+GTAATGAATGAGAAAATTCAACTCTCGCCGTCTCTCATACGCTTAGTTTCTAAGTTATAT
+TCGCATATGGTTGAGTACAATAATGAGCAGGTGATGTTGATTCCTTCTATCTATTCGCCC
+TTCGCATCAAAAATACATATGTTACTTGATCCTAACTCCCTGCAAAGTTTGGACATTTTT
+ACCCATGATGGTGGTAAAGGTTCTTTGTTTTGGTTATTGGACCATACAAGGACATCGTTT
+GGATTAAGAATGTTGAGAGAATGGATTCTCAAACCTTTGGTTGATGTACACCAAATTGAA
+GAGCGGCTTGATGCCATTGAGTGCATTACATCCGAAATCAACAACAGTATATTTTTTGAA
+TCGTTGAATCAAATGTTGAATCATACCCCTGACTTATTAAGAACTTTAAATCGCATAATG
+TATGGTACAACTTCTAGAAAAGAAGTCTATTTCTATTTAAAGCAAATAACTTCTTTCGTT
+GATCACTTCAAGATGCATCAATCTTACCTGTCAGAACATTTCAAGTCATCAGATGGAAGG
+ATAGGCAAACAATCTCCTTTACTTTTTAGACTATTTAGTGAATTGAATGAACTACTTTCT
+ACCACTCAGTTGCCTCATTTTTTGACCATGATCAACGTTTCTGCGGTAATGGAAAAAAAT
+TCAGATAAGCAAGTAATGGATTTTTTTAATTTAAATAACTATGATTGTTCAGAGGGTATA
+ATAAAAATTCAAAGGGAAAGCGAATCAGTACGGTCACAGTTAAAGGAAGAATTGGCAGAA
+ATACGAAAATATCTCAAACGTCCATATCTAAATTTTAGAGATGAAGTTGATTACTTAATC
+GAAGTGAAAAACTCGCAAATTAAGGACTTGCCAGATGATTGGATAAAAGTTAACAATACG
+AAGATGGTCAGTAGATTTACCACTCCCAGAACCCAGAAACTGACTCAAAAGCTAGAATAT
+TACAAGGACTTATTAATTCGGGAATCTGAACTACAGTATAAAGAATTCTTGAACAAAATT
+ACGGCAGAATATACAGAGCTCCGTAAAATTACACTCAATTTGGCGCAGTATGACTGTATT
+TTGTCGTTAGCAGCCACATCATGCAACGTAAATTATGTTAGACCAACTTTTGTGAATGGT
+CAACAAGCCATAATCGCAAAAAATGCAAGAAATCCAATTATCGAGTCGCTGGATGTTCAT
+TATGTACCAAATGATATCATGATGTCCCCAGAAAACGGTAAAATCAATATTATAACGGGG
+CCGAATATGGGTGGGAAATCATCTTATATTAGACAAGTGGCACTGCTTACTATAATGGCA
+CAGATCGGCTCATTTGTCCCCGCAGAAGAGATCAGATTAAGCATATTTGAAAACGTACTC
+ACTCGAATCGGTGCGCACGATGATATTATAAACGGTGATTCTACTTTTAAAGTGGAAATG
+CTTGATATCCTACACATCTTGAAAAATTGCAATAAACGGTCTTTACTATTATTAGACGAA
+GTGGGAAGAGGTACTGGCACGCACGATGGTATAGCAATTTCTTATGCTTTAATAAAGTAT
+TTTTCTGAGTTAAGTGACTGCCCCTTGATATTATTTACTACCCATTTTCCCATGCTGGGA
+GAAATCAAATCTCCGTTAATAAGGAATTATCATATGGATTACGTGGAAGAACAAAAAACT
+GGCGAGGACTGGATGAGTGTAATTTTTCTATATAAGTTAAAAAAGGGATTGACTTATAAT
+AGTTATGGGATGAATGTGGCGAAATTGGCACGCCTGGACAAAGATATTATAAATCGGGCA
+TTCAGTATTTCAGAAGAATTGCGGAAGGAATCCATTAACGAAGACGCGTTGAAATTATTC
+AGCTCTTTGAAAAGAATATTAAAAAGTGATAATATAACAGCAACGGATAAACTCGCGAAA
+TTACTATCATTGGATATCCACTGA
+>CDC39 6327 residues Pha 0 Code 0
+ATGCTATCGGCCACATACCGTGATTTGAACACAGCATCTAATTTAGAAACATCAAAGGAA
+AAACAGGCCGCTCAAATCGTCATTGCACAAATTAGTTTATTATTCACGACTCTTAACAAC
+GACAATTTTGAATCCGTGGAAAGAGAAATTAGACATATTTTAGACAGGTCGTCCGTAGAT
+ATTTACATAAAAGTTTGGGAACGATTATTAACCTTAAGTTCTCGGGATATTTTACAAGCG
+GGAAAATTTTTACTTCAAGAAAATCTACTACACAGACTACTATTAGAATTTGCGAAGGAT
+TTACCGAAGAAAAGCACAGACCTTATTGAGCTTTTGAAAGAACGAACCTTCAATAACCAG
+GAGTTTCAAAAACAAACAGGAATTACATTATCACTTTTCATTGATCTATTTGATAAATCT
+GCAAACAAGGACATTATAGAGTCACTTGACCGCTCCTCTCAGATTAACGATTTCAAGACA
+ATTAAGATGAATCATACAAATTATTTAAGGAATTTTTTTCTTCAAACCACACCAGAAACA
+CTAGAGTCCAATCTACGCGACTTATTGCATTCCTTGGAAGGTGAAAGTCTAAATGACTTA
+TTAGCTCTTTTACTGTCCGAAATACTTTCACCTGGGTCTCAGAATTTACAAAATGATCCC
+ACACGGAGTTGGTTGACACCTCCGATGGTTTTAGACGCAACGAACCGTGGGAACGTTATA
+GCAAGATCTATAAGTTCTCTGCAAGCCAACCAGATAAATTGGAATCGTGTGTTTAATTTA
+ATGTCAACAAAGTATTTCTTGAGCGCACCATTGATGCCTACTACAGCATCTTTGAGTTGC
+TTATTTGCAGCATTGCACGATGGTCCAGTTATTGATGAATTTTTCAGTTGCGACTGGAAA
+GTTATTTTCAAACTAGATTTGGCCATTCAACTTCATAAGTGGTCGGTACAGAATGGTTGC
+TTTGACTTATTAAATGCAGAAGGTACCAGGAAAGTTTCTGAAACCATCCCAAACACAAAG
+CAATCTTTACTCTACTTATTATCCATTGCATCATTGAATTTAGAATTGTTCCTACAAAGG
+GAGGAATTGTCTGATGGTCCTATGCTAGCTTATTTTCAAGAGTGCTTCTTTGAAGATTTC
+AACTACGCCCCTGAATATCTTATTTTAGCATTAGTCAAAGAAATGAAGCGGTTCGTTTTA
+TTGATAGAAAACAGGACAGTCATAGACGAAATACTTATTACCTTATTGATTCAAGTGCAT
+AATAAATCACCGTCATCGTTCAAGGACGTTATTTCTACAATAACCGATGATTCTAAAATC
+GTAGATGCAGCAAAAATCATAATCAACTCGGATGACGCACCTATTGCCAACTTTTTAAAA
+TCGTTGTTAGATACGGGAAGATTAGATACGGTCATTAATAAACTTCCTTTCAATGAAGCT
+TTTAAAATTTTGCCATGCGCAAGACAAATTGGTTGGGAGGGGTTCGATACTTTCTTAAAA
+ACAAAAGTTTCTCCATCTAATGTCGATGTAGTGCTGGAATCACTAGAGGTTCAAACGAAA
+ATGACTGATACAAACACTCCATTTAGGTCATTAAAGACATTTGACTTATTCGCTTTTCAT
+TCATTAATTGAAGTACTGAACAAATGCCCACTAGATGTTCTCCAATTACAAAGGTTTGAA
+TCCTTGGAATTTTCCTTATTAATTGCATTTCCTAGATTGATCAATTTTGGTTTTGGACAC
+GATGAAGCTATTTTAGCCAATGGTGACATCGCAGGGATTAATAATGATATTGAAAAGGAG
+ATGCAGAACTATTTACAGAAAATGTATAGTGGTGAGTTAGCCATTAAAGATGTAATCGAA
+CTTCTGAGAAGGTTAAGAGATAGCGACTTGCCAAGGGACCAGGAAGTCTTCACATGTATT
+ACCCATGCCGTTATAGCAGAATCGACATTCTTCCAAGATTATCCATTGGATGCATTGGCT
+ACTACATCTGTTCTTTTTGGATCCATGATTCTCTTTCAACTGTTACGTGGATTCGTATTA
+GACGTCGCATTTAGGATAATCATGAGGTTTGCCAAGGAGCCTCCAGAGTCCAAGATGTTT
+AAGTTTGCTGTACAAGCTATTTATGCATTTAGGATACGTTTGGCCGAATATCCACAGTAT
+TGTAAGGACCTCTTGAGAGATGTTCCGGCTTTGAAGTCTCAGGCTCAAGTTTACCAATCT
+ATCGTCGAAGCTGCTACCCTAGCAAATGCTCCAAAGGAAAGGTCAAGACCCGTCCAGGAA
+ATGATCCCATTAAAATTTTTTGCTGTAGATGAAGTTTCATGTCAGATCAATCAAGAAGGT
+GCTCCTAAAGATGTCGTAGAAAAAGTTCTTTTTGTTCTCAACAACGTTACTCTGGCTAAC
+TTGAATAATAAGGTTGATGAATTGAAAAAAAGTTTGACACCAAATTATTTTTCTTGGTTT
+TCCACATATTTAGTTACGCAAAGGGCTAAAACAGAACCTAACTATCATGATCTTTATAGC
+AAGGTTATAGTTGCTATGGGGTCAGGGTTGCTACATCAGTTCATGGTCAACGTTACTTTG
+AGACAATTATTTGTCCTACTATCTACAAAAGACGAGCAAGCCATCGATAAAAAGCACCTA
+AAGAATTTGGCTTCATGGTTAGGATGTATCACATTAGCTTTGAATAAACCAATTAAACAC
+AAGAATATCGCATTCAGGGAAATGTTAATCGAAGCTTATAAGGAAAATAGACTTGAAATA
+GTTGTGCCTTTTGTAACAAAGATTTTACAAAGGGCTTCTGAATCAAAAATTTTCAAGCCT
+CCAAATCCCTGGACTGTTGGCATATTAAAGCTGTTGATTGAGTTGAACGAAAAAGCAAAC
+TGGAAATTAAGTTTGACTTTCGAAGTTGAGGTTTTATTAAAATCTTTTAATTTGACCACC
+AAATCTCTCAAGCCCTCGAATTTCATCAATACTCCGGAAGTTATAGAAACTTTATCCGGT
+GCTTTGGGATCAATCACTCTGGAGCAACAACAAACAGAGCAACAAAGGCAAATTATACTA
+ATGCAACAACACCAGCAACAGATGCTAATATATCAACAGAGACAACAACAACAACAACAA
+AGGCAACAACAACAACAACATCATATTAGTGCAAATACAATCGCAGACCAACAAGCGGCA
+TTTGGCGGCGAGGGTTCAATTTCACACGACAATCCTTTTAACAACTTACTTGGTTCTACT
+ATTTTTGTAACCCACCCTGACTTGAAGAGGGTATTTCAAATGGCTTTAGCCAAGTCAGTT
+CGCGAAATTTTGTTGGAAGTAGTCGAAAAGTCATCAGGAATTGCTGTTGTTACGACGACA
+AAAATAATACTTAAAGACTTTGCCACTGAAGTTGATGAGTCTAAGTTGAAGACGGCTGCA
+ATCATTATGGTAAGGCATTTGGCACAAAGTTTAGCTCGAGCTACTTCAATTGAACCATTG
+AAAGAAGGCATACGTTCTACTATGCAATCACTAGCACCGAATTTAATGTCTCTTTCTTCT
+TCACCTGCAGAGGAGCTTGACACGGCAATAAATGAAAATATTGGCATTGCTCTAGTTTTG
+ATTGAGAAAGCATCTATGGACAAGTCTACTCAAGATTTAGCAGACCAATTGATGCAAGCG
+ATTGCTATTCGTCGTTATCACAAGGAAAGAAGGGCAGACCAACCATTTATTACGCAAAAT
+ACCAATCCATATTCACTGTCTTTACCAGAACCTCTTGGTTTGAAAAACACTGGTGTTACT
+CCTCAACAATTCAGGGTATACGAAGAATTTGGTAAGAATATTCCAAACTTGGATGTTATT
+CCGTTTGCAGGATTGCCCGCTCACGCTCCACCGATGACTCAAAATGTGGGTTCAACTCAG
+CCTCAGCAACAACAAGCGCAAATGCCTACCCAAATCCTAACCTCCGAACAAATAAGAGCT
+CAACAACAACAGCAGCAATTACAGAAAAGCCGTTTGAATCAGCCATCCCAGTCGGCTCAA
+CCTCCAGGAGTGAATGTCCCAAATCCTCAAGGTGGGATTGCTGCAGTTCAATCAGATTTG
+GAACAGAATCAACGTGTTCTCGTTCACCTCATGGACATTTTAGTTTCTCAAATTAAAGAA
+AATGCTACGAAGAATAACTTAGCTGAATTAGGCGATCAAAACCAAATTAAAACCATCATT
+TTTCAAATTTTGACATTCATTGCAAAAAGCGCACAAAAGGATCAATTAGCTTTAAAGGTA
+TCCCAAGCTGTCGTTAATAGCCTTTTTGCCACTAGTGAGAGTCCTCTCTGCAGAGAAGTT
+TTGTCCCTACTTTTGGAAAAGTTATGTTCTTTATCCCTCGTTGCTAGAAAAGACGTTGTC
+TGGTGGTTAGTTTATGCCTTGGACAGTAGGAAATTCAATGTTCCCGTTATCAGATCCCTT
+CTAGAAGTTAATTTAATTGATGCTACAGAATTAGATAACGTTTTAGTTACTGCAATGAAA
+AATAAAATGGAGAACTCAACTGAATTTGCTATGAAATTAATTCAGAATACTGTCTTGTCT
+GATGATCCAATTTTGATGAGAATGGACTTCATTAAAACCTTAGAACACTTGGCCTCTTCG
+GAAGATGAAAATGTAAAGAAATTCATCAAAGAGTTCGAAGATACTAAGATAATGCCAGTG
+AGGAAAGGTACCAAAACCACAAGAACAGAAAAGCTTTACTTAGTATTTACGGAATGGGTA
+AAATTACTTCAAAGAGTTGAGAATAACGACGTAATCACAACTGTTTTTATCAAGCAATTA
+GTCGAAAAGGGTGTTATCAGCGATACTGATAATTTACTTACATTTGTCAAAAGTTCTCTT
+GAGCTATCAGTTTCTTCATTCAAAGAAAGTGACCCGACTGATGAGGTTTTCATCGCTATT
+GATGCTCTAGGATCGCTAATTATAAAATTGTTGATTTTACAGGGTTTCAAAGATGATACA
+AGAAGAGATTACATAAATGCAATATTTTCTGTGATCGTTTTAGTGTTTGCTAAGGATCAT
+AGCCAAGAGGGTACCACATTCAATGAACGACCATATTTCAGACTATTTTCTAACATCTTA
+TACGAATGGGCTACCATCAGGACGCACAATTTTGTTAGAATATCTGATTCCAGCACTAGG
+CAGGAGCTGATCGAATTTGATTCTGTATTTTACAACACTTTCTCAGGATATTTGCACGCT
+CTGCAACCATTTGCCTTCCCTGGATTCTCATTTGCATGGGTGACACTATTATCACACAGA
+ATGTTATTACCAATTATGCTAAGATTACCCAATAAAATAGGTTGGGAAAAGTTAATGCTT
+TTGATTATCGATTTGTTTAAATTTTTGGACCAATACACAAGTAAACATGCAGTCTCTGAC
+GCTGTTTCGGTTGTTTATAAGGGAACACTGCGTGTTATTTTAGGCATTTCGAATGATATG
+CCATCCTTTTTGATTGAAAATCACTATGAATTAATGAACAATCTACCTCCAACATATTTC
+CAACTAAAGAATGTTATTTTATCTGCTATTCCTAAGAATATGACCGTTCCCAACCCATAT
+GACGTGGATCTTAATATGGAGGATATTCCAGCATGTAAAGAACTACCTGAAGTCTTCTTT
+GATCCTGTAATTGATTTACACTCATTGAAAAAGCCAGTTGACAACTACCTACGTATTCCC
+TCAAATTCATTATTAAGAACAATACTAAGCGCTATTTACAAGGATACCTATGACATAAAA
+AAGGGCGTAGGCTACGACTTTTTATCTGTTGATAGTAAATTAATTCGCGCTATTGTATTA
+CATGTGGGCATTGAAGCTGGAATAGAGTATAAGAGAACTTCTTCAAATGCGGTATTTAAT
+ACGAAGTCTTCTTATTATACTTTATTGTTCAATCTGATTCAAAATGGTAGCATCGAAATG
+AAATATCAAATTATTCTGTCTATTGTGGAACAATTGCGGTATCCAAACATCCACACCTAT
+TGGTTCAGCTTTGTGTTAATGAATATGTTCAAAAGTGACGAATGGAATGATCAAAAACTT
+GAAGTCCAAGAAATTATTTTAAGAAACTTTTTAAAAAGAATTATTGTTAACAAACCACAT
+ACCTGGGGTGTTTCAGTTTTCTTTACTCAGTTGATAAACAATAACGATATTAATCTTTTA
+GACCTGCCCTTTGTACAAAGTGTTCCCGAAATTAAACTAATTTTACAACAATTAGTAAAA
+TATTCCAAAAAATACACAACCAGTGAACAAGATGACCAATCCGCCACCATCAATAGAAGG
+CAAACCCCTCTACAATCCAACGCATAA
+>YCY4 1176 residues Pha 0 Code 0
+ATGGTTTCATTGTTCAAAAGAGGTAAGGCTCCACCGCTCACGAAAGAAGGCCCCACTTCT
+AAAAAGCCTCCTAACACAGCGTTTAGACAACAAAGGCTTAAGGCATGGCAACCAATACTG
+TCTCCTCAAAGTGTGCTTCCGTTGTTAATATTCGTTGCATGTATATTTACTCCTATTGGT
+ATTGGACTCATTGTAAGCGCTACTAAGGTACAAGATCTAACAATTGATTATAGTCATTGT
+GATACAAAAGCATCTACAACTGCTTTTGAAGATATACCAAAGAAGTACATTAAATATCAC
+TTTAAAAGTAAAGTTGAAAATAAACCACAATGGAGGCTAACCGAAAATGAAAATGGCGAA
+CAATCATGCGAACTGCAGTTCGAAATCCCAAACGATATCAAGAAATCCATTTTTATATAT
+TATAAAATAACCAATTTTTATCAAAATCATCGCAGATATGTCCAATCGTTTGACACAAAG
+CAAATATTAGGGGAGCCTATCAAAAAAGATGATCTGGATACAAGCTGTAGTCCAATAAGA
+AGTAGGGAAGACAAAATAATATATCCCTGTGGGTTGATCGCTAATTCCATGTTTAATGAT
+ACATTTTCTCAGGTGTTGAGTGGTATAGATGACACAGAAGACTATAATTTAACTAACAAG
+CATATATCATGGAGTATTGATCGTCACAGATTTAAAACCACCAAGTATAATGCTAGCGAT
+ATTGTTCCACCGCCAAACTGGATGAAGAAGTATCCCGATGGGTATACAGATGAAAATCTT
+CCTGATATCCATACTTGGGAAGAGTTCCAGGTATGGATGAGGACTGCAGCCTTTCCCAAG
+TTTTACAAGTTGACGTTGAAAAATGAATCTGCTTCTTTACCGAAGGGTAAATATCAAATG
+AACATTGAGTTGAATTATCCGATTTCACTCTTTGGTGGCACAAAATCATTTGTACTGACT
+ACAAATGGAGCTATTGGTGGTAGAAATATGTCACTAGGCGTACTGTACCTCATCGTTGCA
+GGGCTTTGCGCCTTATTTGGCATCATTTTTTTGGTTAAATTAATCTTCCAACCAAGAGCG
+ATGGGTGATCACACTTATTTGAATTTTGATGATGAAGAAAACGAGGATTATGAGGATGTA
+CACGCAGAGAATACAACATTGAGGGAAATTTTATAG
+>A2 360 residues Pha 0 Code 0
+ATGCGCAGCATAGAAAACGATAGAAGTAATTATCAACTTACACAGAAAAATAAATCGGCG
+GATGGGTTGGTATTTAATGTGGTAACTCAAGATATGATAAACAAAAGTACTAAACCTTAC
+AGAGGACACCGGTTTACAAAAGAAAATGTCCGAATACTAGAAAGTTGGTTTGCAAAGAAC
+ATCGAGAACCCATATCTAGATACCAAGGGCCTAGAGAATCTAATGAAGAATACCAGTTTA
+TCTCGCATTCAAATCAAAAACTGGGTTTCGAATAGAAGAAGAAAAGAAAAAACAATAACA
+ATCGCTCCAGAATTAGCGGACCTCTTGAGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA
+>GIT1 1557 residues Pha 0 Code 0
+ATGGAAGACAAAGATATCACATCGGTAAATGAGAAGGAAGTGAACGAGAACACTAATCCT
+AGAATAATAAAATATGATGCCGAGAGGCGTGCAACCCGTACTGAAACCTCAAAGAAAGAT
+AAATGGAAAAACATAGTTACAATCATTGCGTCCGGTTTTGCTCTGATAAGTGATGGTTAC
+GTAAATGGTTCAATGAGTATGCTAAACAAGGTTTTTGTTATGGAGTACGGTAAGAAAAAC
+TATAGCTCAAAAGTGTCGACTAGAGTTTCCAACGCAGCCCTAGTTGGTATTATTTTTGGC
+CAATTCTTTATGGGTATCGCTGCTGATTATTATAGTAGAAAATCTTGTATCCTTGTGGCC
+ACTGCTATCTTGGTTATTGGTAGTGCTCTGTGTGCTGCCTCTCACGGTACTACTGTACCT
+GGCATGTTTTGGATGTTAACAGTTATGAGAGGTTTGGTAGGTATTGGTGTTGGTGCAGAA
+TATCCTACCAGTACATTAAGTGCTAATGAGTCTGCTAATGAATATACCACTACCAAAAGA
+GGTGGTATCCTGGTTATGGTGACAAATTTGCCACTAGCCTTCGGTGGTCCATTTGCTACG
+ATCATCTTTTTAATCGTCTACAAAATCTGTTCAGGAACAAAACATTTAGAGGCGATCTGG
+AGGACTGTTTTTGCAATAGGGTGCTTCTGGCCATTGAGTGTGTTCTATTTTAGATGGAAG
+ACTGCTACTACAGAAGTCTATGAAAAAGGTAGAATCAAGAGAAATATACCATATTTCCTA
+GCATTGAAATTTTATTGGAAAAGGTTACTTGGTACATGTGGTACATGGTTTATGTATGAT
+TTTGTTACCTTCCCAAATGGTATTTTCAGTTCAACAATTATCAGTTCCGTTATCAAGGAC
+CAAAATGATTTAGTAAAAGTGGCAGAGTGGAACTTACTGTTGGGAGTTTTAGCTGTACTG
+GGTGTACCAATTGGTGCTTATCTGTCCGATCGTATTGGTCGTAAATATACGTTGATGTTT
+GGTTTCTCTGGGTACATCATCTTTGGTCTAATCATTGGATGTGCGTACGACCAATTGAAA
+AAAATCACCCCCTTGTTTATTATCTTCTACGCATTCATGAATATGTTAGGTAATGCTGGA
+CCAGGTGATATGCTTGGTGTTATTAGTAGTGAAGCGTCAGCAACCGCTGTTAGAGGTGTT
+TTCTATGGTTTATCTGCTGTGACTGGTAAAATCGGTTCTGTAGTAGGCGTCGAATGTTTC
+CAACCCATTAGGGATAATTTGGGTGCAAGATGGACTTTTATTATTGCTGCAATTTGTGGT
+CTTATTGGTATCATTATTACATATTTCTTTGTTCCACATTCTCTTGAAAGCGATTTAATG
+AAGCAAGACGTTGAATTTCACAACTATTTGGTATCCAATGGCTGGACTGGTAAGATGGGA
+TTTGATGAGACAGATGAAGAATCAATGGTTAGAACTATTGAAGTTGAAGAGAATGGTACT
+AATTGTAGTAAGAAAAACGCAGAAATAATTTCAGTCAGACAGGTCGATCAAAGTTGA
+>YCZ0 951 residues Pha 0 Code 0
+ATGTCATCTACGGACATCTGGATATCCAATGATGCATCTACTTTTCAAAAGGCACAGCTG
+CCTACTCAATTACGGCACGTCAAAGTGATTAAAATTCGTGAAGATTCTATCGGAAGGATC
+ATCCTTCTTATATCGACAGAAATCACAAATGAGGAAAATGCTGATCCAGATCTCTCAGAG
+ATTTTCATATCAGATTCGCAAGGGTTGAAATTCTCACCTGTTGAATGGACACCAAACCAT
+CAGTTTGGAAATTTTAGGCTCACTTTTCCTGATTTCTTGAAAGGGACAATATTTGGATCG
+TTTCATCCTTCCATTGACTATTCTAATCACCAAGTAAACTATACTGAAAATATAGCCGGA
+GGAGAAACCAAAATATCCGTTGATAACGGCCTCACATGGTCAAATTTGAAAGTTGTTGAT
+GAAGAAAATGCCGATTCGTTCGGCTGTGATATCACTAGGCCTGAGAGATGTTCACTTCAG
+GGTTATTTTTACAATCTAAAACTTTCAAATCCTTCTGCTGGGATCATATTAATGACAGGT
+TCTGTTGGCGATGACAATGAATTCGATCGGAAGGACCGAAAAACTTTCATTTCTAGAGAC
+GGTGGTCTAACATGGAGGGTGGCCCATAATTCTTCTGGATTATATGCTACTGGTGATCTG
+GGAAATATTATTGTATATATCCCGTCTCCTTCATATAAAGATGGTGATGTACAATCCAAA
+CTTTATTTTTCCTTGGACCAAGGTAGAACATGGAATCAATATGAGCTTGTTGACGCTTTA
+TTTTATATCCATCCATTAGAGTTGATTAATACAACGCCAGATGGATCAGGCTCAAAATTT
+ATTTTAAGCGGACATCTCATTACTACGGCTAGTCAAGAAGGAAACAACACCAACATCTCA
+TATATTGCAAGAAGTGTCCTGTATGCGATCGATTTTTCTGCTGCATTTTGA
+>YCZ1 549 residues Pha 0 Code 0
+ATGATATTACTTCATGCCATATATACTCTTTGGGTAATTATACTACTTCCGCTACTCAAT
+GCAGAGAAATTTGTCCCAAAAGTAACGGAGGCTCCTATAGAAACATCATTTAATCTAGTG
+AGTTTTGATGATTCCAACACTTCTATCAGATTAGATGGTTGGGGGGTTGTATGGATAAGT
+TTCGACGCTGGAGAAAATTGGGAAACGGTCAAAGAAATTGAAGAGCGCATTTTCAGATTT
+ACTGTTGATCCTTTCCATGGACAGGAAAGAGGTTTCGCTTTTATATGTGAATCACCCAAA
+TTCTACATTACCGACGACCGTGGGGAGTCATGGAGGGCTTTAACTATACCCTCATCAGAA
+GAATATTTAGATGGCGACTGTTTTATAACTACTCATCCTAGAAACAAAGAACTTCTTATT
+GCGAATTGCTATAGCTATATGATAGACGCAGACGTTTTATATGACCCAAGTGAAATTTAC
+TTGAGCAATGATGGGAATCCTTTTTTAAAATTAAACCTTCCTTGGAAAAGAAAAAAGACG
+ACGATATAA
+>YCZ2 1107 residues Pha 0 Code 0
+ATGAAGGCTGTCGTCATTGAAGACGGTAAAGCGGTTGTCAAAGAGGGCGTTCCCATTCCT
+GAATTGGAAGAAGGATTCGTATTGATTAAGACACTCGCTGTTGCTGGTAACCCGACTGAT
+TGGGCACACATTGACTACAAGGTCGGGCCTCAAGGATCTATTCTGGGATGTGACGCTGCC
+GGCCAAATTGTCAAATTGGGCCCAGCCGTCGATCCTAAAGACTTTTCTATTGGTGATTAT
+ATTTATGGGTTCATTCACGGATCTTCCGTAAGGTTTCCTTCCAATGGTGCTTTTGCTGAA
+TATTCTGCTATTTCAACTGTGGTTGCCTACAAATCACCCAATGAACTCAAATTTTTGGGT
+GAAGATGTTCTACCTGCCGGCCCTGTCAGGTCTTTGGAAGGGGCAGCCACTATCCCAGTG
+TCACTGACCACAGCTGGCTTGGTGTTGACCTATAACTTGGGCTTGAACCTGAAGTGGGAG
+CCATCAACCCCACAAAGAAACGGCCCCATCTTATTATGGGGCGGTGCAACTGCAGTAGGT
+CAGTCGCTCATCCAATTAGCCAATAAATTGAATGGCTTCACCAAGATCATTGTTGTGGCT
+TCTCGGAAACACGAAAAACTGTTGAAAGAATATGGTGCTGATCAACTATTTGATTACCAT
+GATATTGACGTGGTAGAACAAATTAAACACAAGTACAACAATATCTCGTATTTAGTCGAC
+TGTGTCGCGAATCAAAATACGCTTCAACAAGTGTACAAATGTGCGGCCGATAAACAGGAT
+GCTACCGTTGTCGAATTAACTAATTTGACAGAAGAAAACGTCAAAAAGGAGAATAGGAGG
+CAAAATGTCACTATTGACAGAACAAGACTGTATTCAATAGGCGGCCATGAAGTACCATTT
+GGTGGCATTACTTTCCCTGCTGACCCAGAAGCCAGGAGAGCTGCCACCGAATTCGTCAAG
+TTCATCAATCCAAAGATTAGTGATGGGCAAATTCACCATATTCCAGCAAGGGTCTATAAG
+AACGGGCTTTACGATGTTCCTCGTATCCTGGAAGACATTAAAATCGGTAAGAACTCTGGT
+GAAAAACTAGTTGCCGTATTAAACTAG
+>YCZ3 336 residues Pha 0 Code 0
+ATGGAGATGCTCTTGTTTCTGAACGAATCATACATCTTTCATAGGTTTCGTATGTGGAGT
+ATTGTTTTATGGCACTCATGTGTATTCGTATGCGCAGAATGTGGGAATGCCAATTATAGG
+GGTGCCGGGGTGCCTTGCAAAACCCTTTTACGCGCGCCTGTGAAGTTTCCGCTTTCGGTC
+AAAAAGAATATCCGAATTTTAGATTTGGACCCTCGTTCAGAAGCTTATTGTCTAAGCCTA
+AATTCAGTCTGCTTTAAACGGCTTCCGCGGAAGAAATATTTCCATCTCTTGAATTCGTAC
+AACATTAAACGTGTGTTGGGAGTCGTATACTGTTAG
+>PAU3 375 residues Pha 0 Code 0
+ATGGTCAAATTAACTTCAATCGCTGCTGGTGTTGCCGCCATCGCTGCCGGTATTGCCGCT
+GCCCCAGCCACTACCACTCTATCTCCATCTGACGAAAGGGTCAACTTGGTCGAATTGGGT
+GTTTACGTCTCCGATATCAGAGCTCATTTGGCTCAATACTACTTGTTTCAAGCAGCTCAT
+CCAACTGAGACCTACCCAGTTGAGATTGCTGAAGCTGTTTTCAACTATGGTGACTTCACC
+ACTATGTTGACTGGTATTCCAGCTGAACAAGTCACCAGAGTCATCACTGGTGTCCCATGG
+TACTCCACTAGATTGAGACCAGCCATCTCCAGTGCTCTATCTAAGGACGGTATCTACACT
+GCTATTCCAAAATAG
+>YCZ5 1086 residues Pha 0 Code 0
+ATGCTTTACCCAGAAAAATTTCAGGGCATCGGTATTTCCAACGCAAAGGATTGGAAGCAT
+CCTAAATTAGTGAGTTTTGACCCAAAACCCTTTGGCGATCATGACGTTGATGTTGAAATT
+GAAGCCTGTGGTATCTGCGGATCTGATTTTCATATAGCCGTTGGTAATTGGGGTCCAGTC
+CCAGAAAATCAAATCCTTGGACATGAAATAATTGGCCGCGTGGTGAAGGTTGGATCCAAG
+TGCCACACTGGGGTAAAAATCGGTGACCGTGTTGGTGTTGGTGCCCAAGCCTTGGCGTGT
+TTTGAGTGTGAACGTTGCAAAAGTGACAACGAGCAATACTGTACCAATGACCACGTTTTG
+ACTATGTGGACTCCTTACAAGGACGGCTACATTTCACAAGGAGGCTTTGCCTCCCACGTG
+AGGCTTCATGAACACTTTGCTATTCAAATACCAGAAAATATTCCAAGTCCGCTAGCCGCT
+CCATTATTGTGTGGTGGTATTACAGTTTTCTCTCCACTACTAAGAAATGGCTGTGGTCCA
+GGTAAGAGGGTAGGTATTGTTGGCATCGGTGGTATTGGGCATATGGGGATTCTGTTGGCT
+AAAGCTATGGGAGCCGAGGTTTATGCGTTTTCGCGAGGCCACTCCAAGCGGGAGGATTCT
+ATGAAACTCGGTGCTGATCACTATATTGCTATGTTGGAGGATAAAGGCTGGACAGAACAA
+TACTCTAACGCTTTGGACCTTCTTGTCGTTTGCTCATCATCTTTGTCGAAAGTTAATTTT
+GACAGTATCGTTAAGATTATGAAGATTGGAGGCTCCATCGTTTCAATTGCTGCTCCTGAA
+GTTAATGAAAAGCTTGTTTTAAAACCGTTGGGCCTAATGGGAGTATCAATCTCAAGCAGT
+GCTATCGGATCTAGGAAGGAAATCGAACAACTATTGAAATTAGTTTCCGAAAAGAATGTC
+AAAATATGGGTGGAAAAACTTCCGATCAGCGAAGAAGGCGTCAGCCATGCCTTTACAAGG
+ATGGAAAGCGGAGACGTCAAATACAGATTTACTTTGGTCGATTATGATAAGAAATTCCAT
+AAATAG
+>YCZ6 2499 residues Pha 0 Code 0
+ATGGATTCGATTACAGTAAAAAAACCTCGGTTAAGATTGGTTTGCCTGCAATGCAAAAAG
+ATCAAACGGAAATGTGATAAACTGCGGCCTGCTTGCTCGCGATGCCAACAAAATTCATTA
+CAGTGTGAATATGAAGAGAGAACAGATTTATCTGCCAATGTTGCAGCAAACGACTCTGAT
+GGATTCAATTCCTCTCATAAGCTCAATTTCGAACAGCAACCTGTACTTGAAAGGACTGGG
+CTTAGATATTCCTTACAAGTGCCTGAAGGTGTCGTTAATGCTACGCTGTCGATATGGAAC
+GCCGAAGATATGCTAGTTATAGTAGGATTAGTTACATTTCTGGATTATCCTTTTGCTGCG
+CATAGTCTGGCGCAACATGACCAGTATATCAGGGCACTTTGTGCTTCGTTGTACGGCATG
+GCGCTTGTTGACTTTAGCAATTATGCTAATGGTATTCCTTGTGAAGACACATCAAGAAGT
+ATACTAGGACCATTGTCATTCATAGAAAAGGCCATTTTTAGACGGATAGAACATAGTAAG
+CAATTTCGAGTTCAGTCTGCCGCCTTAGGGTTATTATACAATGCATTTTCAATGGAAGAA
+GAAAACTTCTCGACTCTTCTACCGTCACTCATCGCTGAAGTGGAAGACGTGTTGATGCAA
+AAAAAAGACTGTGAAATACTTTTGAGGTGTTTCTATCAAAATATTTATCCCTTCTATCCT
+TTTATGGACATTTCACTCTTTGAGAGCGATCTCACTAGTTTGCTTTTACAAGACGACAAT
+AATCGTTGGAAAATTAGTACTGAAGTTAAAAATGTGCGCAAAAAAATAGAAACTTTGTCA
+TTACTTACAATAGTAATGGCCATGGCCTTGATGCATTCAAAATTGGATGCAAATCTTCTT
+TCAATGGTAAAAGAAAATGCCTCCGAAAGTGCCAGGAAACTTTCTCTTTTATGTCATAAA
+CTATTATGCCTCCTGGATGTATTTCGCTATCCAAATGAGAACACTTTTACTTGCCTTTTA
+TATTTCTACGTTTCAGAGCATTTAGATCCCGAGAGTCCCGATTGTGTACTGAGCCCCACT
+AACTTGCTTACTCTGCACCATCTTTTAAATTTGTCCATGACCTTAGGTCTTCAATATGAG
+CCTTCGAAGTACAAACGTTTCAAAGATCCAGAAGTGATAAGGCAGAGACGGATATTATGG
+TTAGGAGTTCAGTCATTACTTTTTCAAATTTCTCTTGCTGAAGGTGATGCTGGTAAATCA
+AATAGTGAATATATGGAGGCATATTTAACAGACTTCGAAGAATATATTGAAGCTTCCTCA
+GAGTATGAAAAAAGTTCTGCGAGTGAATCGAACGTGCAAATGAATGATATTGTTTGGAAT
+AAGTACAAATTTCACGTCATTTTGAGTAAACTAATGTCTGATTGCACTTCAGTTATACAA
+CATCCGCAGCTTTTCCACATTTTAGGAAATATTAAAAGATCTGAAGATTTTATGGCTGAG
+AACTTTCCTACAAGTTCGATTTACCAACCCCTTCATGAAAAGGAACCAAATGCGATCAAA
+GTTGGCAAAAGTACGGTTCTCGATGTCATGGATATTCAAAAAACTGAAATATTTCTTACA
+AATATTGTGGGAAGTATGTGTTTTTTAAACATTTTTGATGTCCTATCGTTACATTTTGAA
+AAAAAATGTGTTATGCACTGGGAAGAATATGAAAAGAACTATCATTTCCTTACTTTGAAA
+AGTTTCAATGCATACTTAAAGCTAGCAGGGTTGATATCTGATTATCTCGAGAATAAGTTT
+CAAGGGAACATTTTAGAGAGTCGCGGTTATATCATAGATAAACAAATATGTTTTATGCTT
+GTAAGGATCTGGATGTTCCAATGTCGTATTTTGTTAAGGTTTTCATACAAGCAAGAAAGT
+CAGAAAAAATTGGCCTCTTCCAGTATATCCACTAACGATAATGAAAAAGAAGATGAAATG
+ATTGTCATTTTAGAAAGACTTATTAAACACATTCGTAACCAAATGGCACATTTAGTGGAT
+CTAGCAAAGGGAAAACTTCAAGATAGTTACTTTGGTGCTTACCAAACTGTTCCCATGTTT
+AGATACGTTGTGTATTTGATCGATGTTGGCGGCTTAGTATCTGTGACAAATGGGTTTTGG
+GATAAGATTTCCAGTGATGGTGAAATACCGCCAAAAGTACAACAAGCCGTGAGATTGAAA
+TGGGGATTGGACTGCAATAATTCGAGAAGAATCAAACAAAAGTTAATAAGCAGCCAGAGT
+TTGCAGAGTTTCAATCAAGTTCTGTTGTGCCAGATGGAGGATGCAGTTCTCTCCAGTTCC
+TTCGCAATAAAAGCCAATACCGCTATGTCCCAAAACACGGCTGAAGAATTTTTCAATATC
+AGCGAAGAAGAGGCTTTAAATCAACTATTGGAAAACAACAATTTTGATGCCTTCTGGGAT
+TTATTAGGTGAAAATCTGAGCGATATGCCTTCTTTGTGA
+>YCZ7 1092 residues Pha 0 Code 0
+ATGATTGGGTCCGCGTCCGACTCATCTAGCAAGTTAGGACGCCTCCGATTTCTTTCTGAA
+ACTGCCGCTATTAAAGTATCCCCGTTAATCCTAGGAGAAGTCTCATACGATGGAGCTCGT
+TCGGATTTTCTCAAATCAATGAACAAGAATCGAGCTTTTGAATTGCTTGATACTTTTTAC
+GAGGCAGGTGGAAATTTCATTGATGCCGCAAACAACTGCCAAAACGAGCAATCAGAAGAA
+TGGATTGGTGAATGGATACAGTCCAGAAGGTTACGTGATCAAATTGTCATTGCAACCAAG
+TTTATAAAAAGCGATAAAAAGTATAAAGCAGGTGAAAGTAACACTGCCAACTACTGTGGT
+AATCACAAGCGTAGTTTACATGTGAGTGTGAGGGATTCTCTCCGCAAATTGCAAACTGAT
+TGGATTGATATACTTTACGTTCACTGGTGGGATTATATGAGTTCAATCGAAGAATTTATG
+GATAGTTTGCATATTCTGGTCCAGCAGGGCAAGGTCCTCTATTTGGGTGTATCTGATACA
+CCTGCTTGGGTTGTTTCTGCGGCAAACTACTACGCTACATCTTATGGTAAAACTCCCTTT
+AGTATCTACCAAGGTAAATGGAACGTGTTGAACAGAGATTTTGAGCGTGATATTATTCCA
+ATGGCTAGGCATTTCGGTATGGCCCTCGCCCCATGGGATGTCATGGGAGGTGGAAGATTT
+CAGAGTAAAAAAGCAATGGAGGAACGGAGGAAGAATGGAGAGGGTATTCGTTCTTTCGTT
+GGCGCCTCCGAACAAACAGATGCAGAAATCAAGATTAGTGAAGCATTGGCCAAGATTGCT
+GAGGAACATGGCACTGAGTCTGTTACTGCTATTGCTATTGCCTATGTTCGCTCTAAGGCG
+AAAAATTTTTTTCCGTCGGTTGAAGGAGGAAAAATTGAGGATCTCAAAGAGAACATTAAG
+GCTCTCAGTATCGATCTAACGCCAGACAATATAAAATACTTAGAAAGTATAGTTCCTTTT
+GACATCGGATTTCCTAATAATTTTATCGTGTTAAATTCCTTGACTCAAAAATATGGTACG
+AATAATGTTTAG
diff --git a/inst/sequences/input.out b/inst/sequences/input.out
new file mode 100644
index 0000000..373b086
--- /dev/null
+++ b/inst/sequences/input.out
@@ -0,0 +1,112 @@
+title GC3s GC
+YCG9_Probable__________13 0.335 0.394
+YCG8________573_residues_ 0.439 0.446
+ALPHA2________633_residue 0.328 0.351
+ALPHA1________528_residue 0.345 0.379
+CHA1_________1083_residue 0.328 0.394
+KRR1__________951_residue 0.364 0.384
+PRD1_________2139_residue 0.430 0.397
+KAR4_________1008_residue 0.354 0.383
+PBN1_________1251_residue 0.330 0.386
+LRE1_________1761_residue 0.347 0.419
+APA1__________966_residue 0.385 0.395
+YCE9__________939_residue 0.410 0.433
+YCE8_________1392_residue 0.396 0.370
+YCE7__________777_residue 0.394 0.391
+YCE5_________2283_residue 0.383 0.386
+YCE6__________324_residue 0.400 0.417
+YCE4_________1254_residue 0.468 0.453
+PDI1_________1569_residue 0.556 0.474
+GLK1_________1503_residue 0.581 0.497
+YCD8_________1587_residue 0.353 0.395
+SRO9_________1401_residue 0.424 0.448
+YCD6_________1701_residue 0.528 0.474
+YCD5__________333_residue 0.472 0.421
+YCD3__________507_residue 0.560 0.490
+STE50________1041_residue 0.467 0.438
+HIS4_________2400_residue 0.370 0.425
+BIK1_________1323_residue 0.438 0.438
+FUS1_________1539_residue 0.390 0.411
+YC08__________579_residue 0.390 0.444
+AGP1_________1902_residue 0.396 0.438
+LEU2_________1095_residue 0.382 0.441
+NFS1_________1494_residue 0.356 0.431
+BUD3_________4104_residue 0.358 0.380
+GBP2_________1284_residue 0.329 0.426
+ILV6__________930_residue 0.555 0.501
+CWH36_________393_residue 0.341 0.382
+PEL1_________1251_residue 0.387 0.390
+RER1__________567_residue 0.339 0.346
+CDC10_________969_residue 0.294 0.371
+MRPL32________552_residue 0.299 0.375
+YCP4__________744_residue 0.358 0.451
+CIT2_________1383_residue 0.285 0.398
+YCP7__________720_residue 0.333 0.363
+SAT4_________1812_residue 0.291 0.392
+RVS161________798_residue 0.330 0.384
+YCQ0__________852_residue 0.348 0.422
+ADP1_________3150_residue 0.343 0.380
+PGK1_________1251_residue 0.498 0.462
+POL4_________1749_residue 0.323 0.358
+YCQ7_________2862_residue 0.326 0.396
+SRD1__________678_residue 0.295 0.370
+MAK32________1092_residue 0.385 0.407
+PET18_________648_residue 0.357 0.389
+MAK31_________267_residue 0.321 0.394
+HSP30_________999_residue 0.377 0.429
+YCR3_________1836_residue 0.288 0.380
+SYN_________1479_residues 0.307 0.373
+YCR6_________2232_residue 0.370 0.405
+GNS1_________630_residues 0.332 0.399
+FEN2_________1539_residue 0.334 0.401
+RIM1__________444_residue 0.343 0.395
+CRY1__________414_residue 0.306 0.460
+YCS2________6504_residues 0.336 0.350
+YCS3________3681_residues 0.302 0.394
+GNS1_________1044_residue 0.349 0.383
+RBK1_________1002_residue 0.333 0.396
+PHO87________2772_residue 0.341 0.386
+BUD5_________1617_residue 0.394 0.398
+MATALPHA2_________633_res 0.328 0.351
+MATALPHA1_________528_res 0.345 0.379
+TSM1_________4224_residue 0.331 0.368
+YCT5________1476_residues 0.560 0.479
+PETCR46_______510_residue 0.695 0.525
+YCT7________828_residues_ 0.672 0.527
+YCT9_________447_residues 0.615 0.547
+ARE1_________1833_residue 0.698 0.499
+RSC6_________1452_residue 0.506 0.459
+THR4_________1545_residue 0.402 0.405
+CTR86________1692_residue 0.356 0.361
+PWP2_________2772_residue 0.391 0.414
+YCU9_________777_residues 0.608 0.532
+YCV1________1752_residues 0.533 0.499
+G10_________474_residues_ 0.566 0.482
+HCM1_________1599_residue 0.411 0.423
+RAD18________1464_residue 0.300 0.379
+CYPR_________957_residues 0.458 0.435
+YCW1________366_residues_ 0.419 0.419
+YCW2________1548_residues 0.364 0.434
+SSK22________3945_residue 0.373 0.393
+SOL2__________948_residue 0.516 0.495
+ERS1__________783_residue 0.492 0.433
+PAT1_______2394_residues_ 0.417 0.435
+SRB8_________4284_residue 0.390 0.352
+YCX3_________384_residues 0.442 0.417
+TUP1_________2142_residue 0.421 0.456
+YC16________462_residues_ 0.649 0.455
+ABP1_________1779_residue 0.397 0.459
+KIN82________2181_residue 0.374 0.399
+MSH3_________3144_residue 0.333 0.366
+CDC39________6327_residue 0.316 0.365
+YCY4________1176_residues 0.312 0.368
+A2____________360_residue 0.342 0.373
+GIT1_________1557_residue 0.295 0.383
+YCZ0_________951_residues 0.298 0.383
+YCZ1________549_residues_ 0.282 0.372
+YCZ2________1107_residues 0.418 0.443
+YCZ3________336_residues_ 0.443 0.423
+PAU3__________375_residue 0.413 0.478
+YCZ5________1086_residues 0.378 0.437
+YCZ6_______2499_residues_ 0.321 0.369
+YCZ7_______1092_residues_ 0.349 0.410
diff --git a/inst/sequences/legacy.fasta b/inst/sequences/legacy.fasta
new file mode 100644
index 0000000..966cb2a
--- /dev/null
+++ b/inst/sequences/legacy.fasta
@@ -0,0 +1,30 @@
+>LEGACY 921 bp
+;
+; Example of a FASTA file using comment lines starting with a semicolon
+; as allowed in the original FASTA program:
+;
+; if (line[0]!='>'&& line[0]!=';') {
+; for (i=l_offset; (n<maxs && rn < sstop)&&
+; ((ic=qascii[line[i]&AAMASK])<EL); i++)
+; if (ic<NA && ++rn > sstart) seq[n++]= ic;
+; if (ic == ES || rn > sstop) break;
+; }
+;
+; From file getseq.c in FASTA program version 35.2.5
+;
+ATGAAAATGAATAAAAGTCTCATCGTCCTCTGTTTATCAGCAGGGTTACTGGCAAGCGCG
+CCTGGAATTAGCCTTGCCGATGTTAACTACGTACCGCAAAACACCAGCGACGCGCCAGCC
+ATTCCATCTGCTGCGCTGCAACAACTCACCTGGACACCGGTCGATCAATCTAAAACCCAG
+ACCACCCAACTGGCGACCGGCGGCCAACAACTGAACGTTCCCGGCATCAGTGGTCCGGTT
+GCTGCGTACAGCGTCCCGGCAAACATTGGCGAACTGACCCTGACGCTGACCAGCGAAGTG
+AACAAACAAACCAGCGTTTTTGCGCCGAACGTGCTGATTCTTGATCAGAACATGACCCCA
+TCAGCCTTCTTCCCCAGCAGTTATTTCACCTACCAGGAACCAGGCGTGATGAGTGCAGAT
+CGGCTGGAAGGCGTTATGCGCCTGACACCGGCGTTGGGGCAGCAAAAACTTTATGTTCTG
+GTCTTTACCACGGAAAAAGATCTCCAGCAGACGACCCAACTGCTCGACCCGGCTAAAGCC
+TATGCCAAGGGCGTCGGTAACTCGATCCCGGATATCCCCGATCCGGTTGCTCGTCATACC
+ACCGATGGCTTACTGAAACTGAAAGTGAAAACGAACTCCAGCTCCAGCGTGTTGGTAGGA
+CCCTTATTTGGTTCCTCCGCTCCAGCTCCGGTTACGGTAGGTAACACGGCGGCACCAGCT
+GTGGCTGCACCCGCTCCGGCACCGGTGAAGAAAAGCGAGCCGATGCTCAACGACACGGAA
+AGTTATTTTAATACCGCGATCAAAAACGCTGTCGCGAAAGGTGATGTTGATAAGGCGTTA
+AAACTGCTTGATGAAGCTGAACGCTTGGGATCGACATCTGCCCGTTCCACCTTTATCAGC
+AGTGTAAAAGGCAAGGGGTAA
diff --git a/inst/sequences/louse.fasta b/inst/sequences/louse.fasta
new file mode 100644
index 0000000..0744e56
--- /dev/null
+++ b/inst/sequences/louse.fasta
@@ -0,0 +1,57 @@
+>gi|548117|gb|L32667.1|GYDCYTOXIB Geomydoecus chapini mitochondrial cytochrome oxidase I gene, partial cds
+CGAGGTATATATTTTAATTTTACCAGGTTTCGGTTTAATTTCCCAGATTATTCTATTCGAAAGGGGAAAG
+AAACAGGTTTTTGGAACTGTCGGGATGATTTATGCTATAATGGCTATTGGTATTCTTGGGTTTGTCGTTT
+GGGCGCATCACATGTTTACCGTAGGGATGGATGTTGACAGTCGAGCATATTTTACTAGAGCTACAATGGT
+TATTGCTGTTCCGACAGGAGTAAAAGTATTCAGTTGATTAGCTACTTCATTTGGAAGTCGGATTTCGTAT
+TCCGTTTCTATTTTGTGAACCCTTGGGTTCGTATTTTTATTTACGGTTGGAGGAATGACAGGATTAGTTT
+TGGCTAATTCGTGTGTTGATGTGGTCCTA
+>gi|548119|gb|L32668.1|GYDCYTOXIC Geomydoecus cherriei mitochondrial cytochrome oxidase I gene, partial cds
+AGAAGTCTATATTTTGATTTTACCCGGATTTGGTTTAATTTCCCAAATTATTTTATTCGAAAGAGGGAAA
+AAGCAGGTTTTTGGTACCGTGGGTATAATTTATGCGATAATAGCAATCGGAGTATTGGGTTTCGTGGTAT
+GGGCTCATCACATGTTTACCGTTGGAATGGATGTAGATAGCCGAGCATACTTCACAAGAGCAACGATAGT
+AATTGCTGTTCCAACCGGGGTGAAAGTATTTAGATGATTAGCCACTTCGTTCGGGAGTCGAGTCTCTTAT
+TCCGTCTCTATACTTTGAACTTTGGGGTTCGTATTCTTGTTTACGGTAGGAGGAATAACAGGGTTAGTTT
+TAGCAAACTCATGTGTGGACGTAGTCCTA
+>gi|548121|gb|L32669.1|GYDCYTOXID Geomydoecus costaricensis mitochondrial cytochrome oxidase I gene, partial cds
+CGAAGTTTATATTTTGATTTTACCCGGATTTGGTTTGATTTCTCAGATTATTTTATTTGAAAGAGGAAAG
+AAGCAGGTTTTTGGAACTGTAGGGATAATTTATGCAATAATAGCAATCGGAGTACTAGGTTTCGTGGTAT
+GGGCACACCACATGTTTACCGTTGGGATGGATGTTGACAGTCGAGCGTATTTCACAAGAGCAACGATAGT
+AATTGCTGTTCCAACTGGGGTAAAGGTATTTAGATGACTAGCCACTTCGTTTGGAAGCCGGGTCTCTTAT
+TCCGTTTCCATGTTATGAACTCTAGGATTTGTATTCTTATTTACGGTAGGAGGAATAACAGGGTTGGTTT
+TAGCAAATTCTTGTGTGGATGTAGTCCTA
+>gi|548125|gb|L32671.1|GYDCYTOXIF Geomydoecus ewingi mitochondrial cytochrome oxidase I gene, partial cds
+TGAAGTTTATATTCTGATTCTTCCAGGATTTGGGTTAATCTCCCACATTGTCCTGTTAGAAAGAGGAAAG
+AAACAAGTCTTTGGCACTGTCGGAATAATTTATGCAATAATAGCAATTGGGGTCCTTGGTTTTGTAGTTT
+GAGCCCATCACATGTTTACTGTGGGAATGGATGTTGATAGCCGAGCATATTTTACTAGAGCGACTATAGT
+AATTGCGGTGCCTACCGGAGTAAAGGTCTTTAGGTGGCTTGCTACTTCTTTTGGAAGTCGGATTTCGTAC
+TCCGTATCGATGCTATGGTCGTTTGGCTTTATTTTTTTATTTACTATCGGCGGCATGACAGGATTAGTTT
+TAGCAAATTCTTGTGTAGATGTGGTATTA
+>gi|548127|gb|L32672.1|GYDCYTOXIG Geomydoecus geomydis mitochondrial cytochrome oxidase I gene, partial cds
+AGAAGTATACATTCTAATCCTTCCGGAATTTGGTTTAATTTCTCATATTGTATTATTGGAAAGGGGAAAA
+AAGCAGGTTTTTGGTACTGTTGGGATAATTTACGCAATAATAGCAATTGGAGTTCTTGGTTTTGTAGTCT
+GAGCTCACCACATATTTACTGTAGGAATGGATGTTGATAGCCGGGCATATTTTACTAGGGCGACTATGGT
+AATTGCGGTACCTACTGGGGTAAAGGTTTTTAGATGATTGGCCACTTCTTTCGGGAGGCGAATTTCATAC
+TCCGTTTCGACACTGTGGGCATTTGGTTTCATTTTCCTATTTACTATTGGTGGGATAACAGGTTTAGTTT
+TAGCAAATTCATGTGTGGATGTGGTCCTA
+>gi|548131|gb|L32675.1|GYDCYTOXII Geomydoecus oklahomensis mitochondrial cytochrome oxidase I gene, partial cds
+AGAAGTATACATTCTAATTCTTCCGGGATTCGGGTTAATCTCTCATATTGTATTATTGGAAAGAGGAAAA
+AAGCAGGTTTTTGGTACTGTTGGGATAATTTACGCAATAATAGCAATTGGAGTTCTTGGTTTTGTAGTCT
+GAGCTCACCATATATTTACTGTAGGAATAGATGTTGATAGCCGGGCATACTTTACTAGGGCGACTATGGT
+AATTGCGGTGCCTACTGGAGTAAAGGTTTTTAGGTGATTGGCCACCTCTTTCGGGAGGCGAATTTCATAT
+TCCGTTTCGACACTGTGGGCATTTGGTTTTATTTTTCTATTTACTATTGGTGGGATAACAGGTTTAGTTC
+TAGCAAATTCATGTGTAGATGTGGTCCTA
+>gi|548133|gb|L32676.1|GYDCYTOXIJ Geomydoecus panamensis mitochondrial cytochrome oxidase I gene, partial cds
+AGAAGTTTATATTTTGATTTTACCTAGATTTGGTTTGATTTCTCAAATTATTCTATTCGAAAGAGGAAAG
+AAACAGGTCTTTGGAACCGTGGGAATAATTTATGCAATAATAGCGATTGGAGTTCTTGGATTTGTGGTTT
+GGGCTCATCACATATTTACTGTTGGGATAGATGTTGATAGACGAGCATATTTTACGAGAGCTACGATAGT
+AATTGCCGTTCCAACAGGGGTAAAGGTGTTTAGGTGATTGGCCACTTCTTTTGGAAGCCGAATTTCATAT
+TCGGTTTCGATGCTCTGAACATTAGGATTCGTATTTCTGTTCACGGTTGGAGGGATAACGGGGTTAGTTC
+TAGCCAATTCGTGTGTAGATGTAGTCCTA
+>gi|548137|gb|L32678.1|GYDCYTOXIL Geomydoecus setzeri mitochondrial cytochrome oxidase I gene, partial cds
+CGAAGTTTATATTTTGATTTTACCAGGTTTTGGGTTAATTTCTCAAATTATTTTATTCGAGAGAGGAAAG
+AAGCAAGTTTTTGGAACTGTAGGAATAATCTATGCTATAATAGCAATTGGAGTTCTTGGATTCGTGGTTT
+GAGCTCATCACATATTCACGGTTGGGATGGATGTGGATAGCCGGGCATATTTTACAAGAGCTACGATAGT
+AATTGCTGTTCCTACTGGAGTTAAAGTGTTTAGATGATTAGCCACTTCTTTTGGAAGCCGAATTTCATAT
+TCGGTTTCGATACTTTGAACGCTGGGATTCGTATTCCTATTCACCGTGGGAGGGATGACAGGATTGGTGT
+TAGCTAATTCGTGCGTAGATGTGGTCCTA
+
diff --git a/inst/sequences/louse.names b/inst/sequences/louse.names
new file mode 100644
index 0000000..994a4e9
--- /dev/null
+++ b/inst/sequences/louse.names
@@ -0,0 +1,8 @@
+G.chapini
+G.cherriei
+G.costaric
+G.ewingi
+G.geomydis
+G.oklahome
+G.panamens
+G.setzeri
diff --git a/inst/sequences/malM.fasta b/inst/sequences/malM.fasta
new file mode 100644
index 0000000..69cb33d
--- /dev/null
+++ b/inst/sequences/malM.fasta
@@ -0,0 +1,17 @@
+>XYLEECOM.MALM 921 bp ACCESSION E00218, X04477
+ATGAAAATGAATAAAAGTCTCATCGTCCTCTGTTTATCAGCAGGGTTACTGGCAAGCGCG
+CCTGGAATTAGCCTTGCCGATGTTAACTACGTACCGCAAAACACCAGCGACGCGCCAGCC
+ATTCCATCTGCTGCGCTGCAACAACTCACCTGGACACCGGTCGATCAATCTAAAACCCAG
+ACCACCCAACTGGCGACCGGCGGCCAACAACTGAACGTTCCCGGCATCAGTGGTCCGGTT
+GCTGCGTACAGCGTCCCGGCAAACATTGGCGAACTGACCCTGACGCTGACCAGCGAAGTG
+AACAAACAAACCAGCGTTTTTGCGCCGAACGTGCTGATTCTTGATCAGAACATGACCCCA
+TCAGCCTTCTTCCCCAGCAGTTATTTCACCTACCAGGAACCAGGCGTGATGAGTGCAGAT
+CGGCTGGAAGGCGTTATGCGCCTGACACCGGCGTTGGGGCAGCAAAAACTTTATGTTCTG
+GTCTTTACCACGGAAAAAGATCTCCAGCAGACGACCCAACTGCTCGACCCGGCTAAAGCC
+TATGCCAAGGGCGTCGGTAACTCGATCCCGGATATCCCCGATCCGGTTGCTCGTCATACC
+ACCGATGGCTTACTGAAACTGAAAGTGAAAACGAACTCCAGCTCCAGCGTGTTGGTAGGA
+CCCTTATTTGGTTCCTCCGCTCCAGCTCCGGTTACGGTAGGTAACACGGCGGCACCAGCT
+GTGGCTGCACCCGCTCCGGCACCGGTGAAGAAAAGCGAGCCGATGCTCAACGACACGGAA
+AGTTATTTTAATACCGCGATCAAAAACGCTGTCGCGAAAGGTGATGTTGATAAGGCGTTA
+AAACTGCTTGATGAAGCTGAACGCTTGGGATCGACATCTGCCCGTTCCACCTTTATCAGC
+AGTGTAAAAGGCAAGGGGTAA
diff --git a/inst/sequences/ortho.fasta b/inst/sequences/ortho.fasta
new file mode 100644
index 0000000..bba6b65
--- /dev/null
+++ b/inst/sequences/ortho.fasta
@@ -0,0 +1,30 @@
+>AK002358.PE1 501 residues
+ATGGCTCAGCGGCTCCTCCTGGGGAGGTTCCTGACCTCAGTCATCTCCAGGAAGCCTCCT
+CAGGGTGTGTGGGCTTCCCTCACCTCTAAGACCCTGCAGACCCCTCAGTACAATGCTGGT
+GGTCTAACAGTAATGCCCAGCCCAGCCCGGACAGTACACACCACCAGAGTCTGTTTGACG
+ACCTTTAACGTCCAGGATGGACCTGACTTTCAAGACAGAGTTGTCAACAGTGAGACACCA
+GTTGTTGTGGACTTTCATGCACAGTGGTGTGGCCCCTGCAAGATCCTAGGACCGCGGCTA
+GAGAAGATGGTCGCCAAGCAGCACGGGAAGGTGGTCATGGCCAAAGTGGACATTGACGAT
+CACACAGACCTTGCCATTGAATATGAGGTGTCAGCTGTGCCTACCGTGCTAGCCATCAAG
+AACGGGGACGTGGTGGACAAGTTTGTGGGGATCAAGGACGAGGACCAGCTAGAAGCCTTC
+CTGAAGAAGCTGATTGGCTGA
+>HSU78678.PE1 501 residues
+ATGGCTCAGCGACTTCTTCTGAGGAGGTTCCTGGCCTCTGTCATCTCCAGGAAGCCCTCT
+CAGGGTCAGTGGCCACCCCTCACTTCCAAAGCCCTGCAGACCCCACAATGCAGTCCTGGT
+GGCCTGACTGTAACACCCAACCCAGCCCGGACAATATACACCACGAGGATCTCCTTGACA
+ACCTTTAATATCCAGGATGGACCTGACTTTCAAGACCGAGTGGTCAACAGTGAGACACCA
+GTGGTTGTGGATTTCCACGCACAGTGGTGTGGACCCTGCAAGATCCTGGGGCCGAGGTTA
+GAGAAGATGGTGGCCAAGCAGCACGGGAAGGTGGTGATGGCCAAGGTGGATATTGATGAC
+CACACAGACCTCGCCATTGAGTATGAGGTGTCAGCGGTGCCCACTGTGCTGGCCATGAAG
+AATGGGGACGTGGTGGACAAGTTTGTGGGCATCAAGGATGAGGATCAGTTGGAGGCCTTC
+CTGAAGAAGCTGATTGGCTGA
+>RNU73525.PE1 501 residues
+ATGGCTCAGCGGCTTCTCCTGAGGAGGTTCCTGACCTCAGTCATCTCCAGGAAGCCTCCT
+CAGGGTGTGTGGGCTTCCCTCACCTCTACGAGCCTGCAGACCCCTCCGTACAATGCTGGT
+GGTCTAACTGGAACACCCAGCCCTGCCCGGACATTTCACACCACCAGAGTCTGTTCAACA
+ACCTTTAACGTCCAGGATGGACCTGACTTTCAAGACAGAGTTGTCAACAGTGAGACACCA
+GTTGTCGTGGACTTTCATGCACAGTGGTGTGGCCCCTGCAAGATCCTAGGACCTCGGTTA
+GAGAAGATGGTAGCCAAACAGCACGGGAAGGTGGTGATGGCCAAAGTGGACATTGACGAT
+CACACAGACCTTGCCATTGAGTACGAGGTGTCTGCTGTGCCTACCGTGCTGGCCATCAAG
+AACGGGGACGTGGTGGACAAGTTTGTGGGGATCAAGGACGAAGACCAGCTGGAAGCCTTC
+CTGAAGAAGCTAATTGGCTGA
diff --git a/inst/sequences/scuco.txt b/inst/sequences/scuco.txt
new file mode 100644
index 0000000..d375937
--- /dev/null
+++ b/inst/sequences/scuco.txt
@@ -0,0 +1,112 @@
+title T3s C3s A3s G3s CAI CBI Fop Nc GC3s GC L_sym L_aa Gravy Aromo
+YCG9_Probable__________13 0.4337 0.2347 0.3588 0.1852 0.123 0.075 0.446 54.09 0.335 0.394 439 458 0.610699 0.122271
+YCG8________573_residues_ 0.2876 0.3595 0.4222 0.1875 0.100 0.020 0.394 52.46 0.439 0.446 180 190 -0.211579 0.084211
+ALPHA2________633_residue 0.3636 0.2273 0.4939 0.2177 0.109 -0.034 0.397 58.73 0.328 0.351 204 210 -0.667143 0.052381
+ALPHA1________528_residue 0.4361 0.2180 0.4228 0.2589 0.112 -0.008 0.411 58.29 0.345 0.379 168 175 -0.522857 0.148571
+CHA1_________1083_residue 0.4399 0.2337 0.3927 0.1958 0.156 0.119 0.490 50.30 0.328 0.394 351 360 -0.046667 0.075000
+KRR1__________951_residue 0.4045 0.2409 0.4310 0.2591 0.215 0.197 0.530 45.99 0.364 0.384 302 316 -0.762658 0.079114
+PRD1_________2139_residue 0.4180 0.2757 0.3244 0.3169 0.190 0.131 0.493 53.19 0.430 0.397 684 712 -0.481742 0.117978
+KAR4_________1008_residue 0.3931 0.2328 0.4412 0.2512 0.167 0.135 0.500 50.47 0.354 0.383 322 335 -0.554030 0.089552
+PBN1_________1251_residue 0.4102 0.2156 0.4419 0.2210 0.132 0.012 0.414 53.53 0.330 0.386 403 416 -0.462260 0.098558
+LRE1_________1761_residue 0.4072 0.2278 0.4202 0.2250 0.122 0.025 0.412 52.23 0.347 0.419 570 586 -0.848123 0.061433
+APA1__________966_residue 0.3975 0.3156 0.4043 0.1972 0.335 0.373 0.631 42.55 0.385 0.395 309 321 -0.439252 0.093458
+YCE9__________939_residue 0.4041 0.3061 0.3440 0.2244 0.143 0.099 0.475 58.49 0.410 0.433 295 312 -0.452885 0.125000
+YCE8_________1392_residue 0.3850 0.2460 0.3835 0.3014 0.168 0.139 0.500 49.11 0.396 0.370 454 463 -0.123974 0.073434
+YCE7__________777_residue 0.3861 0.2277 0.4022 0.3118 0.112 -0.041 0.378 58.22 0.394 0.391 251 258 -0.201938 0.104651
+YCE5_________2283_residue 0.4197 0.2705 0.3664 0.2416 0.165 0.064 0.460 53.53 0.383 0.386 732 760 -0.225000 0.102632
+YCE6__________324_residue 0.3733 0.1867 0.3810 0.3291 0.120 0.105 0.480 61.00 0.400 0.417 100 107 -0.404673 0.074766
+YCE4_________1254_residue 0.3441 0.3765 0.3099 0.2098 0.130 0.119 0.468 54.53 0.468 0.453 402 417 -0.442686 0.083933
+PDI1_________1569_residue 0.3243 0.4840 0.2541 0.2493 0.404 0.481 0.695 34.31 0.556 0.474 509 522 -0.248276 0.107280
+GLK1_________1503_residue 0.2775 0.3650 0.2402 0.3792 0.158 0.208 0.521 51.32 0.581 0.497 484 500 -0.220000 0.068000
+YCD8_________1587_residue 0.4497 0.2081 0.3171 0.2428 0.121 0.069 0.444 50.90 0.353 0.395 502 528 0.498864 0.134470
+SRO9_________1401_residue 0.3800 0.3686 0.3616 0.1864 0.264 0.338 0.607 49.05 0.424 0.448 453 466 -0.957725 0.064378
+YCD6_________1701_residue 0.2897 0.3310 0.3116 0.3634 0.095 -0.007 0.396 57.17 0.528 0.474 540 566 -0.488693 0.093640
+YCD5__________333_residue 0.3494 0.3253 0.3333 0.3200 0.194 0.272 0.574 53.33 0.472 0.421 108 110 -0.248182 0.054545
+YCD3__________507_residue 0.2868 0.3309 0.2719 0.4000 0.120 0.136 0.472 59.78 0.560 0.490 159 168 -0.648214 0.095238
+STE50________1041_residue 0.2815 0.2815 0.3992 0.3362 0.120 0.095 0.464 55.52 0.467 0.438 332 346 -0.663295 0.046243
+HIS4_________2400_residue 0.4692 0.2500 0.3238 0.2304 0.268 0.314 0.591 45.96 0.370 0.425 782 799 -0.192866 0.065081
+BIK1_________1323_residue 0.2895 0.3322 0.4576 0.2733 0.133 0.088 0.464 55.42 0.438 0.438 425 440 -1.036591 0.061364
+FUS1_________1539_residue 0.3643 0.2298 0.3953 0.2812 0.101 -0.012 0.406 57.18 0.390 0.411 495 512 -0.612500 0.078125
+YC08__________579_residue 0.4161 0.2919 0.3241 0.1940 0.266 0.369 0.626 44.09 0.390 0.444 187 192 -0.194792 0.083333
+AGP1_________1902_residue 0.4408 0.2805 0.2830 0.2201 0.256 0.336 0.600 44.83 0.396 0.438 603 633 0.249289 0.121643
+LEU2_________1095_residue 0.4369 0.2901 0.3061 0.1859 0.432 0.480 0.683 34.70 0.382 0.441 353 364 -0.028571 0.049451
+NFS1_________1494_residue 0.4150 0.2476 0.3648 0.1960 0.226 0.296 0.572 44.53 0.356 0.431 481 497 -0.243461 0.072435
+BUD3_________4104_residue 0.3851 0.2439 0.4436 0.2391 0.137 0.019 0.430 51.99 0.358 0.380 1348 1367 -0.572860 0.076811
+GBP2_________1284_residue 0.4689 0.2373 0.3805 0.1893 0.168 0.125 0.466 50.03 0.329 0.426 416 427 -0.961124 0.096019
+ILV6__________930_residue 0.2305 0.4336 0.3024 0.2414 0.245 0.368 0.611 44.07 0.555 0.501 301 309 -0.184790 0.038835
+CWH36_________393_residue 0.3434 0.2525 0.5158 0.2000 0.130 0.116 0.468 49.71 0.341 0.382 126 130 -0.813846 0.069231
+PEL1_________1251_residue 0.3666 0.2669 0.3994 0.2343 0.124 0.038 0.419 55.50 0.387 0.390 408 416 -0.264663 0.115385
+RER1__________567_residue 0.4082 0.2449 0.4331 0.2035 0.237 0.187 0.511 44.36 0.339 0.346 174 188 0.055319 0.154255
+CDC10_________969_residue 0.5233 0.1783 0.3592 0.2186 0.169 0.104 0.478 47.07 0.294 0.371 316 322 -0.380435 0.086957
+MRPL32________552_residue 0.4538 0.1769 0.4745 0.2362 0.187 0.122 0.480 51.92 0.299 0.375 177 183 -0.920219 0.071038
+YCP4__________744_residue 0.4531 0.2604 0.3300 0.1885 0.199 0.253 0.546 49.55 0.358 0.451 240 247 -0.228340 0.085020
+CIT2_________1383_residue 0.4578 0.1962 0.4327 0.1677 0.185 0.146 0.480 44.29 0.285 0.398 446 460 -0.333044 0.095652
+YCP7__________720_residue 0.4197 0.2176 0.4451 0.2365 0.134 0.001 0.424 50.14 0.333 0.363 231 239 -0.101674 0.142259
+SAT4_________1812_residue 0.4864 0.1900 0.4009 0.1889 0.146 0.049 0.433 47.91 0.291 0.392 580 603 -0.593035 0.069652
+RVS161________798_residue 0.4221 0.2362 0.4505 0.2097 0.210 0.178 0.513 46.57 0.330 0.384 261 265 -0.551698 0.086792
+YCQ0__________852_residue 0.4779 0.2008 0.2921 0.2394 0.212 0.286 0.560 46.41 0.348 0.422 273 283 0.351237 0.148410
+ADP1_________3150_residue 0.4339 0.2073 0.3904 0.2534 0.139 0.044 0.435 51.18 0.343 0.380 1027 1049 -0.000477 0.102002
+PGK1_________1251_residue 0.4018 0.3776 0.2179 0.2532 0.804 0.835 0.900 26.81 0.498 0.462 410 416 -0.115865 0.067308
+POL4_________1749_residue 0.4543 0.2108 0.4487 0.2447 0.151 0.032 0.438 52.20 0.323 0.358 564 582 -0.575945 0.104811
+YCQ7_________2862_residue 0.4677 0.2152 0.3462 0.2020 0.156 0.122 0.476 49.43 0.326 0.396 905 953 0.250787 0.149003
+SRD1__________678_residue 0.4545 0.2121 0.4571 0.1863 0.133 0.045 0.441 48.99 0.295 0.370 220 225 -1.112445 0.057778
+MAK32________1092_residue 0.3787 0.2558 0.4016 0.2613 0.110 -0.026 0.399 58.83 0.385 0.407 351 363 -0.271901 0.104683
+PET18_________648_residue 0.4182 0.2606 0.4267 0.2214 0.207 0.128 0.488 52.78 0.357 0.389 207 215 -0.335349 0.144186
+MAK31_________267_residue 0.5143 0.1857 0.2754 0.2097 0.140 0.064 0.432 45.63 0.321 0.394 81 88 0.404545 0.011364
+HSP30_________999_residue 0.4357 0.2607 0.3151 0.2130 0.230 0.255 0.551 47.53 0.377 0.429 316 332 0.355422 0.138554
+YCR3_________1836_residue 0.4708 0.1809 0.4000 0.1938 0.133 0.032 0.426 47.43 0.288 0.380 584 611 0.058756 0.124386
+SYN_________1479_residues 0.3990 0.2150 0.4862 0.1903 0.144 0.062 0.445 50.04 0.307 0.373 476 492 -0.388008 0.117886
+YCR6_________2232_residue 0.4080 0.2441 0.4096 0.2629 0.120 -0.012 0.416 52.79 0.370 0.405 711 743 -0.546837 0.114401
+GNS1_________630_residues 0.4518 0.2108 0.3774 0.2238 0.092 -0.040 0.386 58.78 0.332 0.399 202 209 -0.177034 0.066986
+FEN2_________1539_residue 0.4498 0.2081 0.3624 0.2257 0.128 0.060 0.439 51.34 0.334 0.401 476 512 0.178125 0.134766
+RIM1__________444_residue 0.5315 0.2793 0.3271 0.1782 0.292 0.322 0.601 36.11 0.343 0.395 143 147 -0.797279 0.102041
+CRY1__________414_residue 0.5424 0.2458 0.2522 0.1101 0.758 0.762 0.851 28.50 0.306 0.460 134 137 -0.378102 0.036496
+YCS2________6504_residues 0.4228 0.2271 0.4398 0.2388 0.123 -0.021 0.411 53.12 0.336 0.350 2095 2167 -0.153946 0.118136
+YCS3________3681_residues 0.4461 0.1864 0.4392 0.2107 0.120 -0.001 0.403 52.35 0.302 0.394 1201 1226 -1.003589 0.065253
+GNS1_________1044_residue 0.4545 0.2458 0.3320 0.1947 0.239 0.243 0.549 44.21 0.349 0.383 335 347 0.358501 0.158501
+RBK1_________1002_residue 0.4885 0.2038 0.3490 0.2402 0.162 0.182 0.522 48.54 0.333 0.396 324 333 -0.243243 0.078078
+PHO87________2772_residue 0.4089 0.2389 0.4020 0.2010 0.168 0.161 0.502 47.61 0.341 0.386 886 923 0.088407 0.095341
+BUD5_________1617_residue 0.3310 0.3125 0.4526 0.2023 0.128 0.083 0.458 54.42 0.394 0.398 520 538 -0.284387 0.104089
+MATALPHA2_________633_res 0.3636 0.2273 0.4939 0.2177 0.109 -0.034 0.397 58.73 0.328 0.351 204 210 -0.667143 0.052381
+MATALPHA1_________528_res 0.4361 0.2180 0.4228 0.2589 0.112 -0.008 0.411 58.29 0.345 0.379 168 175 -0.522857 0.148571
+TSM1_________4224_residue 0.4768 0.1868 0.3871 0.2733 0.141 0.024 0.439 50.46 0.331 0.368 1359 1407 -0.418621 0.085288
+YCT5________1476_residues 0.2839 0.3920 0.2748 0.3458 0.119 0.090 0.463 59.87 0.560 0.479 477 491 -0.326273 0.107943
+PETCR46_______510_residue 0.1241 0.4526 0.2391 0.4160 0.114 0.174 0.494 45.17 0.695 0.525 164 169 -0.362130 0.065089
+YCT7________828_residues_ 0.2429 0.3143 0.1722 0.5685 0.075 0.022 0.400 48.69 0.672 0.527 265 275 -0.543273 0.076364
+YCT9_________447_residues 0.1940 0.4627 0.2566 0.2385 0.124 0.223 0.503 45.50 0.615 0.547 143 148 -0.056757 0.074324
+ARE1_________1833_residue 0.1992 0.4419 0.1851 0.4845 0.114 0.089 0.466 48.53 0.698 0.499 573 610 0.035738 0.167213
+RSC6_________1452_residue 0.3057 0.3472 0.3258 0.3163 0.130 0.095 0.460 53.35 0.506 0.459 472 483 -0.744306 0.062112
+THR4_________1545_residue 0.3951 0.3679 0.3653 0.1514 0.404 0.457 0.682 37.49 0.402 0.405 503 514 -0.280350 0.101167
+CTR86________1692_residue 0.4318 0.2416 0.4103 0.2536 0.155 0.050 0.449 54.03 0.356 0.361 548 563 -0.197513 0.101243
+PWP2_________2772_residue 0.3965 0.2777 0.3778 0.2351 0.162 0.112 0.478 51.97 0.391 0.414 896 923 -0.359263 0.104009
+YCU9_________777_residues 0.2772 0.4703 0.2151 0.3121 0.147 0.152 0.502 55.93 0.608 0.532 245 258 -0.285271 0.089147
+YCV1________1752_residues 0.3184 0.3965 0.2365 0.2487 0.120 0.128 0.472 56.89 0.533 0.499 555 583 0.168782 0.113208
+G10_________474_residues_ 0.2301 0.4690 0.3419 0.3084 0.130 0.109 0.474 52.83 0.566 0.482 152 157 -0.852229 0.082803
+HCM1_________1599_residue 0.3756 0.2703 0.3776 0.2755 0.137 0.112 0.467 57.13 0.411 0.423 518 532 -0.733271 0.078947
+RAD18________1464_residue 0.4270 0.2162 0.4734 0.1791 0.146 0.063 0.441 50.13 0.300 0.379 467 487 -0.755852 0.059548
+CYPR_________957_residues 0.3837 0.3527 0.3000 0.2406 0.181 0.109 0.477 53.88 0.458 0.435 310 318 -0.294025 0.103774
+YCW1________366_residues_ 0.3579 0.2316 0.3736 0.3418 0.104 0.049 0.444 54.99 0.419 0.419 117 121 -0.423967 0.090909
+YCW2________1548_residues 0.4115 0.2488 0.3639 0.2096 0.169 0.158 0.497 48.00 0.364 0.434 489 515 -0.424854 0.075728
+SSK22________3945_residue 0.3837 0.2287 0.4193 0.2719 0.128 0.064 0.444 54.11 0.373 0.393 1249 1314 -0.328234 0.099696
+SOL2__________948_residue 0.3295 0.3527 0.2647 0.3059 0.116 0.101 0.461 58.91 0.516 0.495 306 315 -0.251428 0.076190
+ERS1__________783_residue 0.2857 0.2673 0.3430 0.3882 0.088 -0.033 0.382 53.44 0.492 0.433 238 260 0.336154 0.157692
+PAT1_______2394_residues_ 0.3964 0.2799 0.3279 0.2619 0.180 0.215 0.525 50.08 0.417 0.435 769 797 -0.491719 0.069009
+SRB8_________4284_residue 0.3950 0.2687 0.4088 0.2845 0.155 0.042 0.458 54.92 0.390 0.352 1372 1427 -0.166924 0.113525
+YCX3_________384_residues 0.3608 0.2887 0.3441 0.3012 0.157 0.087 0.467 53.80 0.442 0.417 120 127 -0.078740 0.086614
+TUP1_________2142_residue 0.3940 0.2827 0.3262 0.2524 0.181 0.195 0.521 51.80 0.421 0.456 699 713 -0.627910 0.054698
+YC16________462_residues_ 0.3229 0.3438 0.1913 0.5963 0.131 0.024 0.464 45.93 0.649 0.455 151 153 -1.171895 0.078431
+ABP1_________1779_residue 0.4212 0.2988 0.3625 0.2336 0.238 0.259 0.558 47.62 0.397 0.459 579 592 -1.042061 0.060811
+KIN82________2181_residue 0.3646 0.2778 0.4411 0.2149 0.128 0.035 0.428 55.47 0.374 0.399 706 726 -0.641873 0.079890
+MSH3_________3144_residue 0.4221 0.1959 0.4288 0.2576 0.117 -0.035 0.394 52.47 0.333 0.366 1017 1047 -0.309647 0.079274
+CDC39________6327_residue 0.4196 0.2110 0.4409 0.2068 0.169 0.131 0.487 49.25 0.316 0.365 2045 2108 -0.091461 0.086338
+YCY4________1176_residues 0.4643 0.1883 0.4209 0.2449 0.130 -0.003 0.421 51.25 0.312 0.368 378 391 -0.421483 0.117647
+A2____________360_residue 0.3448 0.2299 0.5000 0.2317 0.099 -0.043 0.386 56.36 0.342 0.373 114 119 -0.906723 0.067227
+GIT1_________1557_residue 0.5012 0.2028 0.3429 0.1742 0.193 0.195 0.530 44.36 0.295 0.383 491 518 0.267568 0.123552
+YCZ0_________951_residues 0.4704 0.2000 0.4000 0.1939 0.120 -0.014 0.408 51.30 0.298 0.383 309 316 -0.312658 0.110759
+YCZ1________549_residues_ 0.4430 0.2081 0.4646 0.1636 0.105 -0.066 0.379 51.38 0.282 0.372 174 182 -0.128571 0.137363
+YCZ2________1107_residues 0.3974 0.3079 0.3206 0.2278 0.200 0.235 0.547 48.47 0.418 0.443 364 368 -0.124728 0.078804
+YCZ3________336_residues_ 0.3830 0.2553 0.3108 0.3333 0.084 -0.092 0.330 61.00 0.443 0.423 106 111 0.181982 0.135135
+PAU3__________375_residue 0.4128 0.3670 0.2524 0.1075 0.618 0.687 0.810 31.93 0.413 0.478 121 124 0.317742 0.088710
+YCZ5________1086_residues 0.4460 0.2822 0.3235 0.2058 0.187 0.192 0.522 53.17 0.378 0.437 347 361 -0.055956 0.074792
+YCZ6_______2499_residues_ 0.4534 0.2112 0.4206 0.2218 0.123 -0.005 0.403 53.63 0.321 0.369 797 832 -0.156491 0.099760
+YCZ7_______1092_residues_ 0.4877 0.2456 0.3271 0.2152 0.147 0.072 0.452 55.49 0.349 0.410 347 363 -0.352893 0.107438
diff --git a/inst/sequences/seqAA.fasta b/inst/sequences/seqAA.fasta
new file mode 100644
index 0000000..f51d8a2
--- /dev/null
+++ b/inst/sequences/seqAA.fasta
@@ -0,0 +1,5 @@
+>A06852 183 residues
+MPRLFSYLLGVWLLLSQLPREIPGQSTNDFIKACGRELVRLWVEICGSVSWGRTALSLEE
+PQLETGPPAETMPSSITKDAEILKMMLEFVPNLPQELKATLSERQPSLRELQQSASKDSN
+LNFEEFKKIILNRQNEAEDKSLLELKNLGLDKHSRKKRLFRMTLSEKCCQVGCIRKDIAR
+LC*
diff --git a/inst/sequences/smallAA.fasta b/inst/sequences/smallAA.fasta
new file mode 100644
index 0000000..4432191
--- /dev/null
+++ b/inst/sequences/smallAA.fasta
@@ -0,0 +1,2 @@
+>smallAA A very small AA file in FASTA format
+SEQINRSEQINRSEQINRSEQINR*
diff --git a/inst/sequences/smallAA.fasta.gz b/inst/sequences/smallAA.fasta.gz
new file mode 100644
index 0000000..a53886a
Binary files /dev/null and b/inst/sequences/smallAA.fasta.gz differ
diff --git a/inst/sequences/someORF.fsa b/inst/sequences/someORF.fsa
new file mode 100644
index 0000000..696ca0f
--- /dev/null
+++ b/inst/sequences/someORF.fsa
@@ -0,0 +1,453 @@
+;
+; This FASTA file example was taken from package Biostrings_2.6.4 on 25-NOV-2007
+;
+>YAL001C TFC3 SGDID:S0000001, Chr I from 152168-146596, reverse complement, Verified ORF
+ACTTGTAAATATATCTTTTATTTTCCGAGAGGAAAAAGTTTCAAAAAAAAAAAAAAAAAA
+AGAAGAAAAATAACTTTCTCATGTAATAAAAGGTAACTAATGTAGACAAAAAAGTATACA
+TTTAGCTTTTCTTTTTTTGATGATTTTTGAGTTTCATGTTACTAATCAGAACAATTAACG
+ACACCTTCATTATGAAAAAATTAATTAGCTATAAGTCTTCGAAGTAGAACATGATATTTG
+GCAATCACTCGAATAACTATCTTAATTTACCTGCTGAAATAATTTGAAAAAACACCCGAG
+GCAGCAGACGAAAGGTGTTTTTGCTAAACAATGATTGATTTCTGGCGCCATTTCTACATT
+CTGAACAGTTCATCTCATTTCAGTAACAGTACTTCAATGGAATATTTATTAAAGAAAGTG
+CTTAAAAAAGTATTATAAAACGATACATGGACTGACTCAAGATTGAGCTAATAAGGTCCA
+CCGCCTAGTGCTTAAGAGTTCTGTACCACTATAATAATTTATCTTGATCGTATTATGTGT
+AAAAAAAAGGCGCTTGAAATGAAAGCTCCGAAAATTAAAATACTTTGACTGCTTCGGAAA
+ACAAAAACATATAAATAAATTTAAAAAATAAACTGTAAAATATTTAAAAACTATTAAAAA
+TATTTTATATTTTTAAAATTATTTATTATTATGTCATGTGACAAGACTTAAATCATTACA
+TAAAAGGTTTTGAAGTTCAATGTCAAAGTCAATATAATAAGCATACTAAGGCACACTTAT
+GCAAATCGAGTTATTGAAGCTGGTAAAATTATAAGATTTTTATTTTTATTTCTTTTATTT
+CTGCAAATCTGCATTTTCAAATACCGCTTGGTTTTTTGCATCATAAAGGGCGGCGCTTTC
+AGTCGCGAAAGTGAAATAAACAACCAGTCACACATATAACTTTCTTCTTGCCATAAGAGA
+GAAGAGGACGTTTGGTTGAAGCCAACTAGCCACAAGAAAAATGGTACTGACGATTTATCC
+TGACGAACTCGTACAAATAGTGTCTGATAAAATTGCTTCAAATAAGGGAAGTATGTTCAT
+GTCTCATTCTCCTTTTCGGCTCCGTTTAGGTGATAAACGTACTATATTGTGAAAGATTAT
+TTACTAACGACACATTGAAGAAATCACTTTGAATCAGCTGTGGGATATATCTGGTAAATA
+TTTTGATTTGTCTGATAAAAAAGTTAAACAGTTCGTGCTTTCATGCGTGATATTGAAAAA
+GGACATTGAGGTGTATTGTGATGGTGCTATAACAACTAAAAATGTGACTGATATTATAGG
+CGACGCTAATCATTCATACTCGGTTGGGATTACTGAGGACAGCCTATGGACATTATTAAC
+GGGATACACAAAAAAGGAGTCAACTATTGGAAATTCTGCATTTGAACTACTTCTCGAAGT
+TGCCAAATCAGGAGAAAAAGGGATCAATACTATGGATTTGGCGCAGGTAACTGGGCAAGA
+TCCTAGAAGTGTGACTGGACGTATCAAGAAAATAAACCACCTGTTAACAAGTTCACAACT
+GATTTATAAGGGACACGTCGTGAAGCAATTGAAGCTAAAAAAATTCAGCCATGACGGGGT
+GGATAGTAATCCCTATATTAATATTAGGGATCATTTAGCAACAATAGTTGAGGTGGTAAA
+ACGATCAAAAAATGGTATTCGCCAGATAATTGATTTAAAGCGTGAATTGAAATTTGACAA
+AGAGAAAAGACTTTCTAAAGCTTTTATTGCAGCTATTGCATGGTTAGATGAAAAGGAGTA
+CTTAAAGAAAGTGCTTGTAGTATCACCCAAGAATCCTGCCATTAAAATCAGATGTGTAAA
+ATACGTGAAAGATATTCCAGACTCTAAAGGCTCGCCTTCATTTGAGTATGATAGCAATAG
+CGCGGATGAAGATTCTGTATCAGATAGCAAGGCAGCTTTCGAAGATGAAGACTTAGTCGA
+AGGTTTAGATAATTTCAATGCGACTGATTTATTACAAAATCAAGGCCTTGTTATGGAAGA
+GAAAGAGGATGCTGTAAAGAATGAAGTTCTTCTTAATCGATTTTATCCACTTCAAAATCA
+GACTTATGACATTGCAGATAAGTCTGGCCTTAAAGGAATTTCAACTATGGATGTTGTAAA
+TCGAATTACCGGAAAAGAATTTCAGCGAGCTTTTACCAAATCAAGCGAATATTATTTAGA
+AAGTGTGGATAAGCAAAAAGAAAATACAGGGGGGTATAGGCTTTTTCGCATATACGATTT
+TGAGGGAAAGAAGAAGTTTTTTAGGCTGTTCACAGCTCAGAACTTTCAAAAGTTAACAAA
+TGCGGAAGACGAAATATCCGTTCCAAAAGGGTTTGATGAGCTAGGCAAATCTCGTACCGA
+TTTGAAAACTCTCAACGAGGATAATTTCGTCGCACTCAACAACACTGTTAGATTTACAAC
+GGACAGCGATGGACAGGATATATTCTTCTGGCACGGTGAATTAAAAATTCCCCCAAACTC
+AAAAAAAACTCCGAATAAAAACAAACGGAAGAGGCAGGTTAAAAACAGTACTAATGCTTC
+TGTTGCAGGAAACATTTCGAATCCCAAAAGGATTAAGCTAGAGCAGCATGTCAGCACTGC
+ACAGGAGCCGAAATCTGCTGAAGATAGTCCAAGTTCAAACGGAGGCACTGTTGTCAAAGG
+CAAGGTGGTTAACTTCGGCGGCTTTTCTGCCCGCTCTTTGCGTTCACTACAGAGACAGAG
+AGCCATTTTGAAAGTTATGAATACGATTGGTGGGGTAGCATACCTGAGAGAACAATTTTA
+CGAAAGCGTTTCTAAATATATGGGCTCCACAACGACATTAGATAAAAAGACTGTCCGTGG
+TGATGTTGATTTGATGGTAGAAAGCGAAAAATTAGGAGCCAGAACAGAGCCTGTATCAGG
+AAGAAAAATTATTTTTTTGCCCACTGTTGGAGAGGACGCTATCCAAAGGTACATCCTGAA
+AGAAAAAGATAGTAAAAAAGCAACCTTTACTGATGTTATACATGATACGGAAATATACTT
+CTTTGACCAAACGGAAAAAAATAGGTTTCACAGAGGAAAGAAATCAGTTGAAAGAATTCG
+TAAGTTTCAGAACCGCCAAAAGAATGCTAAGATCAAAGCTTCAGATGACGCTATCTCTAA
+GAAGAGTACGTCGGTCAACGTATCAGATGGAAAGATCAAAAGGAGAGACAAAAAAGTGTC
+TGCTGGTAGGACAACGGTGGTCGTGGAAAATACTAAAGAAGACAAAACTGTCTATCATGC
+AGGCACTAAAGATGGTGTTCAGGCTTTAATCAGAGCTGTTGTAGTTACTAAAAGTATTAA
+AAATGAAATAATGTGGGACAAAATAACAAAATTATTTCCTAATAATTCTTTAGATAACCT
+AAAAAAGAAATGGACGGCACGGCGAGTAAGAATGGGTCATAGTGGTTGGAGGGCATATGT
+CGATAAGTGGAAAAAAATGCTCGTTCTAGCCATTAAAAGTGAAAAGATTTCACTGAGGGA
+TGTTGAAGAACTAGATCTTATCAAATTGCTTGATATTTGGACCTCTTTTGATGAAAAGGA
+AATAAAAAGGCCGCTCTTTCTTTATAAGAACTACGAAGAGAATAGAAAAAAATTTACTCT
+GGTACGTGATGACACACTTACACATTCTGGCAACGATCTGGCCATGTCTTCTATGATTCA
+AAGAGAGATCTCTTCTTTAAAAAAAACTTACACTAGAAAGATTTCCGCTTCTACTAAGGA
+CTTATCGAAGAGTCAAAGCGACGATTATATTCGCACAGTGATCCGGTCCATATTAATAGA
+AAGTCCTTCGACCACTAGAAATGAAATAGAGGCGTTGAAGAACGTTGGAAACGAATCAAT
+AGATAACGTCATCATGGATATGGCTAAGGAAAAGCAAATTTATCTCCATGGCTCAAAACT
+TGAATGTACTGATACTTTACCAGACATTTTGGAAAATAGAGGAAATTATAAAGATTTTGG
+TGTAGCTTTTCAGTATAGATGTAAGGTTAATGAATTATTGGAGGCCGGAAACGCTATTGT
+TATCAATCAAGAGCCGTCCGATATATCCTCTTGGGTTTTAATTGATTTGATTTCGGGAGA
+GCTATTGAATATGGATGTAATTCCAATGGTGAGAAATGTTCGACCTTTAACGTATACTTC
+AAGGAGATTTGAAATACGAACATTAACTCCCCCTCTGATTATATATGCCAATTCTCAGAC
+AAAATTGAATACAGCAAGGAAGTCTGCTGTCAAAGTTCCACTGGGCAAACCATTTTCTCG
+TTTATGGGTGAATGGATCTGGTTCCATTAGGCCAAACATATGGAAGCAGGTAGTTACTAT
+GGTCGTTAACGAAATAATATTTCATCCAGGGATAACATTGAGTAGATTGCAATCTAGGTG
+TCGTGAAGTACTTTCGCTTCATGAAATATCAGAAATATGCAAATGGCTCCTAGAAAGACA
+AGTATTAATAACTACTGATTTTGATGGCTATTGGGTCAATCATAATTGGTATTCTATATA
+TGAATCTACATAATGAAATGAGGTGTATAAATTTTACTTTTATGTAACCAAAGTTGTATT
+AAATATTTAGAAATGTTATACTATTTTTGGGTTAGATTCCGTCTGGCAAATTAAACAAGA
+ATATTCATCGGGTTTCTGGGCCAAGTTTTCGAGGCAAGTCTGGTGAAAGCCATGGTGACA
+TTTGAATATGACAAGGGGAGTTTTGAGATCTACACTAATCATATCTTGGTGGCGCTGTAC
+ATTTTCCCAAGCTAGAAAAAGTAATGGGTCCAGACCAGCTCCCCATATTTTTTTCCCGCA
+GATTTCGCAGTCGTCGGTGTGTATAGACCACCCTTCACTCAGAAATTTTCTATACTGTTG
+AACAAGATCTTGCGAAGAATCCTCTATAATCTTTTGAATCAACTCAGAAAGAGATCTTTC
+CAATTTATAAGTATTAAAAACATTCAGTAGCAGTTGCTTTAAATCCTGAACTTTCATTAA
+AATAACGTCTTGGTGCTCAAGAACAGAAGACATTATTTCCCAGAATTCCCCACCTGAATC
+CTTCTGAGAGGAACTCTTGGACCTAACCAATCCCAAAAATGCTTCTTGAAGTAGTTTATT
+ACATAAATCTTTCCTTTCATCGTGTGAAGGATATTTCCCATATAGAGTGATCAGGAATGT
+AATCAATAATATCCAACACGATTTTTTCTCGGTACCAGCAGAATTACAATCATCAAAGGC
+CCTCCTCAAAGATTCCAATAAAGTAGATATATTTGTTTTCCCTTCATTTAGACATTGTTC
+AATAAAACTTAATAAATCCCTGACTGCTAGGTTAAAACTTTCCAAGCGTTCGTGAATTAT
+AGCTGCAGCTTCAAAATTAGAGTCTTGATTCAATAAGTCCAGAATTTGTAAGCTCTCAGC
+GTTGCTGAGAACTGTACCATTAAGCCAAAGAATCATTTCCCTTTTGCTCTTGTATTTACA
+GTTCAATTCGATGTGTAAATTTCTTAAACGCTTATCGACCTTATTGTTGATAT
+>YAL002W VPS8 SGDID:S0000002, Chr I from 142709-148533, Verified ORF
+TTCCAAGGCCGATGAATTCGACTCTTTCCCAGCTGCCTCTGCTGCCGCTGCCGAAGAAGA
+AGAAGATGACGATGTCGATTTATTCGGTTCCGACGATGAAGAAGCTGACGCTGAAGCTGA
+AAAGTTGAAGGCTGAAAGAATTGCCGCATACAACGCTAAGAAGGCTGCTAAGCCAGCTAA
+GCCAGCTGCTAAGTCCATTGTCACTCTAGATGTCAAGCCATGGGATGATGAAACCAATTT
+GGAAGAAATGGTTGCTAACGTCAAGGCCATCGAAATGGAAGGTTTGACCTGGGGTGCTCA
+CCAATTTATCCCAATTGGTTTCGGTATCAAGAAGTTGCAAATTAACTGTGTTGTCGAAGA
+TGACAAGGTTTCCTTGGATGACTTGCAACAAAGCATTGAAGAAGACGAAGACCACGTCCA
+ATCTACCGATATTGCTGCTATGCAAAAATTATAAAAGGCTTTTTTATAAACTTTTTATAA
+TTAACATTAAAGCAAAAACAACATTGTAAAGATTAACAAATAAATGAAAAAAACAACGAA
+ATAACTTAGGTTTTAGGCTAAAAAAAACAGAAGGAATTTTGAACGATAAACTTTTCGACT
+GCACACGAAACATTATTACTAATTTGTGTAACCACTATATAAGGAATCGTGTTTATTAAT
+TGAATTTATTCCGGGAATATTCAAGTTATGTATATCTCTTTTCATATTCTTAAATACACA
+TACTCATAATATCTTGTCGAAAATACGCGGTGTAGGGAGTTATGGTGGATAACTTTTTCA
+CGATTAGAAGAAAAGGAAAATTTCATTATTCGTAGCTTAACATGGCAAAAACGAGAAAGA
+CATATAATCAAAACGTGAGTTTCCTGTGGAAAAAAAAAAAAGGGAACCTCTGGTTACGAT
+GATATACCTGCGTGAAAAAGGACAGTTATTACCAATACATACAAAGGCTTAATAAGTGTA
+AAATATATATCTGCCGAGACCATTACTCATTACACCTAGAATGGAGCAAAATGGCCTTGA
+CCACGACAGCAGATCTAGCATCGATACGACTATTAATGACACTCAAAAGACTTTCCTAGA
+ATTTAGATCGTATACCCAATTAAGTGAAAAACTGGCATCTAGTTCTTCATATACGGCACC
+TCCCCTGAACGAAGATGGTCCTAAAGGGGTAGCTTCTGCAGTGTCACAAGGCTCCGAATC
+CGTAGTCTCATGGACAACTTTAACACACGTATATTCCATCCTGGGTGCTTATGGAGGGCC
+CACGTGCTTGTATCCGACAGCCACGTATTTTTTGATGGGCACTTCTAAAGGATGCGTACT
+CATTTTTAATTATAATGAACATTTGCAGACAATCCTAGTGCCGACCTTATCTGAGGACCC
+TTCTATTCACTCAATAAGAAGTCCAGTGAAATCAATTGTCATATGTTCCGATGGTACTCA
+TGTAGCTGCCTCATACGAGACCGGAAATATATGCATTTGGAACTTGAACGTAGGGTATAG
+AGTGAAACCCACTTCTGAACCAACAAATGGTATGACCCCAACGCCTGCCTTACCGGCAGT
+CTTACACATCGATGACCATGTGAACAAGGAAATCACAGGGTTAGACTTTTTTGGTGCTCG
+GCATACAGCCCTGATTGTTAGTGATAGGACAGGTAAAGTATCACTCTATAACGGTTACAG
+AAGAGGCTTTTGGCAGTTGGTGTATAATTCAAAAAAAATTTTAGATGTGAACTCTTCCAA
+GGAGAAATTAATAAGGTCAAAGTTGTCTCCACTAATATCACGGGAGAAAATTTCCACTAA
+TTTGTTGAGTGTACTCACAACTACACATTTTGCCCTTATTTTATTATCGCCACACGTTTC
+TTTGATGTTTCAAGAAACTGTTGAACCCTCAGTACAAAATTCTCTAGTCGTGAATAGCTC
+TATTTCATGGACTCAAAACTGTTCCAGGGTTGCTTATTCCGTAAATAATAAAATTTCTGT
+TATTTCCATATCTTCATCAGACTTCAATGTTCAGTCCGCTAGCCATTCTCCTGAATTTGC
+AGAATCTATATTATCCATTCAATGGATTGACCAGCTCCTACTTGGTGTTTTAACCATATC
+GCACCAATTTTTGGTATTGCACCCCCAACATGACTTCAAGATCCTGTTAAGATTGGATTT
+TCTGATTCACGATTTGATGATCCCACCTAATAAATATTTTGTAATAAGTAGAAGAAGTTT
+CTACCTGTTAACAAACTACTCATTTAAAATTGGCAAATTTGTGTCTTGGTCAGATATTAC
+TTTAAGACATATTTTGAAAGGCGACTACTTGGGTGCATTGGAGTTCATAGAATCACTTTT
+GCAACCTTACTGTCCACTGGCAAACTTGTTGAAGCTAGATAATAATACGGAAGAGAGGAC
+TAAGCAACTTATGGAACCATTTTACAATCTGTCCTTGGCTGCCCTAAGGTTTCTTATAAA
+AAAAGATAATGCCGACTACAATAGGGTTTACCAATTATTAATGGTAGTTGTTCGTGTTTT
+GCAGCAATCTTCCAAAAAACTAGACTCAATTCCTTCTCTAGACGTCTTTTTGGAACAGGG
+TCTGGAGTTCTTTGAATTGAAGGACAACGCGGTATATTTTGAAGTTGTAGCAAATATTGT
+TGCCCAAGGATCAGTTACGTCAATTTCCCCAGTTCTTTTCAGGTCCATAATTGATTACTA
+TGCTAAGGAGGAGAATTTAAAAGTAATTGAAGACTTAATCATCATGTTAAATCCTACTAC
+GCTTGATGTTGATCTTGCCGTCAAACTATGCCAAAAGTATAATTTGTTCGATTTATTAAT
+ATATATTTGGAACAAGATCTTTGATGATTATCAAACCCCAGTGGTGGACTTGATATACAG
+GATTTCTAACCAAAGTGAAAAATGTGTGATCTTCAATGGTCCTCAAGTACCTCCTGAAAC
+GACTATATTTGATTACGTAACGTATATCCTTACTGGCAGGCAATATCCACAAAACTTGTC
+TATATCACCAAGTGATAAATGCTCCAAAATACAAAGGGAACTTTCAGCATTTATTTTTAG
+TGGCTTCTCCATAAAATGGCCGTCGAACAGCAATCATAAACTTTACATATGCGAAAATCC
+AGAAGAAGAGCCAGCATTTCCTTACTTTCACCTTTTATTGAAATCGAATCCGAGTAGGTT
+CTTAGCAATGCTCAATGAAGTGTTTGAAGCGTCCTTGTTTAACGATGACAATGACATGGT
+TGCATCAGTTGGAGAAGCAGAATTGGTAAGTAGGCAATATGTTATTGATCTACTATTGGA
+TGCTATGAAAGATACGGGAAATTCAGACAACATCAGGGTACTTGTTGCAATTTTCATTGC
+AACTAGTATATCAAAATATCCTCAATTTATTAAAGTGTCTAACCAAGCCCTCGACTGCGT
+TGTTAATACCATATGCTCCTCTAGGGTTCAAGGTATATATGAAATTTCTCAAATAGCTCT
+GGAGTCGCTTTTACCCTATTATCATTCAAGAACAACAGAAAATTTTATACTGGAACTAAA
+AGAAAAAAATTTCAATAAAGTTCTTTTCCATATCTATAAAAGTGAAAATAAGTACGCCAG
+TGCGCTTTCACTTATTTTAGAAACTAAGGACATCGAAAAAGAATATAACACGGACATTGT
+ATCCATAACCGACTACATACTCAAAAAATGCCCACCTGGAAGTTTAGAATGTGGCAAAGT
+TACTGAAGTTATCGAGACGAACTTTGATCTTCTTCTCTCAAGGATCGGTATCGAAAAATG
+CGTCACAATTTTTTCTGACTTTGACTATAATCTTCATCAAGAAATCCTGGAAGTAAAGAA
+TGAGGAGACTCAGCAAAAGTATTTGGATAAGCTTTTTTCTACGCCAAATATCAACAATAA
+GGTCGATAAGCGTTTAAGAAATTTACACATCGAATTGAACTGTAAATACAAGAGCAAAAG
+GGAAATGATTCTTTGGCTTAATGGTACAGTTCTCAGCAACGCTGAGAGCTTACAAATTCT
+GGACTTATTGAATCAAGACTCTAATTTTGAAGCTGCAGCTATAATTCACGAACGCTTGGA
+AAGTTTTAACCTAGCAGTCAGGGATTTATTAAGTTTTATTGAACAATGTCTAAATGAAGG
+GAAAACAAATATATCTACTTTATTGGAATCTTTGAGGAGGGCCTTTGATGATTGTAATTC
+TGCTGGTACCGAGAAAAAATCGTGTTGGATATTATTGATTACATTCCTGATCACTCTATA
+TGGGAAATATCCTTCACACGATGAAAGGAAAGATTTATGTAATAAACTACTTCAAGAAGC
+ATTTTTGGGATTGGTTAGGTCCAAGAGTTCCTCTCAGAAGGATTCAGGTGGGGAATTCTG
+GGAAATAATGTCTTCTGTTCTTGAGCACCAAGACGTTATTTTAATGAAAGTTCAGGATTT
+AAAGCAACTGCTACTGAATGTTTTTAATACTTATAAATTGGAAAGATCTCTTTCTGAGTT
+GATTCAAAAGATTATAGAGGATTCTTCGCAAGATCTTGTTCAACAGTATAGAAAATTTCT
+GAGTGAAGGGTGGTCTATACACACCGACGACTGCGAAATCTGCGGGAAAAAAATATGGGG
+AGCTGGTCTGGACCCATTACTTTTTCTAGCTTGGGAAAATGTACAGCGCCACCAAGATAT
+GATTAGTGTAGATCTCAAAACTCCCCTTGTCATATTCAAATGTCACCATGGCTTTCACCA
+GACTTGCCTCGAAAACTTGGCCCAGAAACCCGATGAATATTCTTGTTTAATTTGCCAGAC
+GGAATCTAACCCAAAAATAGTATAACATTTCTAAATATTTAATACAACTTTGGTTACATA
+AAAGTAAAATTTATACACCTCATTTCATTATGTAGATTCATATATAGAATACCAATTATG
+ATTGACCCAATAGCCATCAAAATCAGTAGTTATTAATACTTGTCTTTCTAGGAGCCATTT
+GCATATTTCTGATATTTCATGAAGCGAAAGTACTTCACGACACCTAGATTGCAATCTACT
+CAATGTTATCCCTGGATGAAATATTATTTCGTTAACGACCATAGTAACTACCTGCTTCCA
+TATGTTTGGCCTAATGGAACCAGATCCATTCACCCATAAACGAGAAAATGGTTTGCCCAG
+TGGAACTTTGACAGCAGACTTCCTTGCTGTATTCAATTTTGTCTGAGAATTGGCATATAT
+AATCAGAGGGGGAGTTAATGTTCGTATTTCAAATCTCCTTGAAGTATACGTTAAAGGTCG
+AACATTTCTCACCATTGGAATTACATCCATATTCAATAGCTCTCCCGAAATCAAATCAAT
+TAAAACCCAAGAGGATATATCGGACGGCTCTTGATTGATAACAATAGCGTTTCCGGCCTC
+CAATAATTCATTAACCTTACATCTATACTGAAAAGCTACACCAAAATCTTTATAATTTCC
+TCTATTTTCCAAAATGTCTGGTAAAGTATCAGTACATTCAAGTTTTGAGCCATGGAGATA
+AATTTGCTTTTCCTTAGCCATATCCATGATGACGTTATCTATTGATTCGTTTCCAACGTT
+CTTCAACGCCTCTATTTCATTTCTAGTGGTCGAAGGACTTTCTATTAATATGGACCGGAT
+CACTGTGCGAATATAATCGTCGCTTTGACTCTTCGATAAGTCCTTAGTAGAAGCGGAAAT
+CTTTCTAGTGTAAGTTTTTTTTAAAGAAGAGATCTCTCTTTGAATCATAGAAGACATGGC
+CAGATCGTTGCCAGAATGTGTAAGTGTGTCATCACGTACCAGAGTAAATTTTTTTCTATT
+CTCTT
+>YAL003W EFB1 SGDID:S0000003, Chr I from 141176-144162, Verified ORF
+CTTCATGTCAGCCTGCACTTCTGGGTCGTTGAAGTTTCTACCGATCAAACGCTTAGCGTC
+GAAAACGGTATTCGAAGGATTCATAGCAGCTTGATTCTTAGCAGCATCACCAATCAATCT
+TTCAGTGTCAGTGAAAGCGACAAAAGATGGAGTGGTTCTGTTACCTTGATCGTTGGCAAT
+AATGTCCACACGATCATTAGCAAAGTGAGCAACACACGAGTATGTTGTACCTAAATCAAT
+ACCGACAGCTTTTGACATATTATCTGTTATTTACTTGAATTTTTGTTTCTTGTAATACTT
+GATTACTTTTCTTTTGATGTGCTTATCTTACAAATAGAGAAAATAAAACAACTTAAGTAA
+GAATTGGGAAACGAAACTACAACTCAATCCCTTCTCGAAGATACATCAATCCACCCCTTA
+TATAACCTTGAAGTCCTCGAAACGATCAGCTAATCTAAATGGCCCCCCTTCTTTTTGGGT
+TCTTTCTCTCCCTTTTGCCGCCGATGGAACGTTCTGGAAAAAGAAGAATAATTTAATTAC
+TTTCTCAACTAAAATCTGGAGAAAAAACGCAAATGACAGCTTCTAAACGTTCCGTGTGCT
+TTCTTTCTAGAATGTTCTGGAAAGTTTACAACAATCCACAAGAACGAAAATGCCGTTGAC
+AATGATGAAACCATCATCCACACACCGCGCACACGTGCTTTATTTCTTTTTCTGAATTTT
+TTTTTTCCGCCATTTTCAACCAAGGAAATTTTTTTTCTTAGGGCTCAGAACCTGCAGGTG
+AAGAAGCGCTTTAGAAATCAAAGCACAACGTAACAATTTGTCGACAACCGAGCCTTTGAA
+GAAAAAATTTTTCACATTGTCGCCTCTAAATAAATAGTTTAAGGTTATCTACCCACTATA
+TTTAGTTGGTTCTTTTTTTTTTCCTTCTACTCTTTATCTTTTTACCTCATGCTTTCTACC
+TTTCAGCACTGAAGAGTCCAACCGAATATATACACACATAATGGCATCCACCGATTTCTC
+CAAGATTGAAACTTTGAAACAATTAAACGCTTCTTTGGCTGACAAGTCATACATTGAAGG
+GTATGTTCCGATTTAGTTTACTTTATAGATCGTTGTTTTTCTTTCTTTTTTTTTTTTCCT
+ATGGTTACATGTAAAGGGAAGTTAACTAATAATGATTACTTTTTTTCGCTTATGTGAATG
+ATGAATTTAATTCTTTGGTCCGTGTTTATGATGGGAAGTAAGACCCCCGATATGAGTGAC
+AAAAGAGATGTGGTTGACTATCACAGTATCTGACGATAGCACAGAGCAGAGTATCATTAT
+TAGTTATCTGTTATTTTTTTTTCCTTTTTTGTTCAAAAAAAGAAAGACAGAGTCTAAAGA
+TTGCATTACAAGAAAAAAGTTCTCATTACTAACAAGCAAAATGTTTTGTTTCTCCTTTTA
+AAATAGTACTGCTGTTTCTCAAGCTGACGTCACTGTCTTCAAGGCTTTCCAATCTGCTTA
+CCCAGAATTCTCCAGATGGTTCAACCACATCGCTTCCAAGGCCGATGAATTCGACTCTTT
+CCCAGCTGCCTCTGCTGCCGCTGCCGAAGAAGAAGAAGATGACGATGTCGATTTATTCGG
+TTCCGACGATGAAGAAGCTGACGCTGAAGCTGAAAAGTTGAAGGCTGAAAGAATTGCCGC
+ATACAACGCTAAGAAGGCTGCTAAGCCAGCTAAGCCAGCTGCTAAGTCCATTGTCACTCT
+AGATGTCAAGCCATGGGATGATGAAACCAATTTGGAAGAAATGGTTGCTAACGTCAAGGC
+CATCGAAATGGAAGGTTTGACCTGGGGTGCTCACCAATTTATCCCAATTGGTTTCGGTAT
+CAAGAAGTTGCAAATTAACTGTGTTGTCGAAGATGACAAGGTTTCCTTGGATGACTTGCA
+ACAAAGCATTGAAGAAGACGAAGACCACGTCCAATCTACCGATATTGCTGCTATGCAAAA
+ATTATAAAAGGCTTTTTTATAAACTTTTTATAATTAACATTAAAGCAAAAACAACATTGT
+AAAGATTAACAAATAAATGAAAAAAACAACGAAATAACTTAGGTTTTAGGCTAAAAAAAA
+CAGAAGGAATTTTGAACGATAAACTTTTCGACTGCACACGAAACATTATTACTAATTTGT
+GTAACCACTATATAAGGAATCGTGTTTATTAATTGAATTTATTCCGGGAATATTCAAGTT
+ATGTATATCTCTTTTCATATTCTTAAATACACATACTCATAATATCTTGTCGAAAATACG
+CGGTGTAGGGAGTTATGGTGGATAACTTTTTCACGATTAGAAGAAAAGGAAAATTTCATT
+ATTCGTAGCTTAACATGGCAAAAACGAGAAAGACATATAATCAAAACGTGAGTTTCCTGT
+GGAAAAAAAAAAAAGGGAACCTCTGGTTACGATGATATACCTGCGTGAAAAAGGACAGTT
+ATTACCAATACATACAAAGGCTTAATAAGTGTAAAATATATATCTGCCGAGACCATTACT
+CATTACACCTAGAATGGAGCAAAATGGCCTTGACCACGACAGCAGATCTAGCATCGATAC
+GACTATTAATGACACTCAAAAGACTTTCCTAGAATTTAGATCGTATACCCAATTAAGTGA
+AAAACTGGCATCTAGTTCTTCATATACGGCACCTCCCCTGAACGAAGATGGTCCTAAAGG
+GGTAGCTTCTGCAGTGTCACAAGGCTCCGAATCCGTAGTCTCATGGACAACTTTAACACA
+CGTATATTCCATCCTGGGTGCTTATGGAGGGCCCACGTGCTTGTATCCGACAGCCACGTA
+TTTTTTGATGGGCACTTCTAAAGGATGCGTACTCATTTTTAATTATAATGAACATTTGCA
+GACAATCCTAGTGCCGACCTTATCTGAGGACCCTTCTATTCACTCAATAAGAAGTCCAGT
+GAAATCAATTGTCATATGTTCCGATGGTACTCATGTAGCTGCCTCAT
+>YAL005C SSA1 SGDID:S0000004, Chr I from 142433-138505, reverse complement, Verified ORF
+CACTCATATCGGGGGTCTTACTTCCCATCATAAACACGGACCAAAGAATTAAATTCATCA
+TTCACATAAGCGAAAAAAAGTAATCATTATTAGTTAACTTCCCTTTACATGTAACCATAG
+GAAAAAAAAAAAAGAAAGAAAAACAACGATCTATAAAGTAAACTAAATCGGAACATACCC
+TTCAATGTATGACTTGTCAGCCAAAGAAGCGTTTAATTGTTTCAAAGTTTCAATCTTGGA
+GAAATCGGTGGATGCCATTATGTGTGTATATATTCGGTTGGACTCTTCAGTGCTGAAAGG
+TAGAAAGCATGAGGTAAAAAGATAAAGAGTAGAAGGAAAAAAAAAAGAACCAACTAAATA
+TAGTGGGTAGATAACCTTAAACTATTTATTTAGAGGCGACAATGTGAAAAATTTTTTCTT
+CAAAGGCTCGGTTGTCGACAAATTGTTACGTTGTGCTTTGATTTCTAAAGCGCTTCTTCA
+CCTGCAGGTTCTGAGCCCTAAGAAAAAAAATTTCCTTGGTTGAAAATGGCGGAAAAAAAA
+AATTCAGAAAAAGAAATAAAGCACGTGTGCGCGGTGTGTGGATGATGGTTTCATCATTGT
+CAACGGCATTTTCGTTCTTGTGGATTGTTGTAAACTTTCCAGAACATTCTAGAAAGAAAG
+CACACGGAACGTTTAGAAGCTGTCATTTGCGTTTTTTCTCCAGATTTTAGTTGAGAAAGT
+AATTAAATTATTCTTCTTTTTCCAGAACGTTCCATCGGCGGCAAAAGGGAGAGAAAGAAC
+CCAAAAAGAAGGGGGGCCATTTAGATTAGCTGATCGTTTCGAGGACTTCAAGGTTATATA
+AGGGGTGGATTGATGTATCTTCGAGAAGGGATTGAGTTGTAGTTTCGTTTCCCAATTCTT
+ACTTAAGTTGTTTTATTTTCTCTATTTGTAAGATAAGCACATCAAAAGAAAAGTAATCAA
+GTATTACAAGAAACAAAAATTCAAGTAAATAACAGATAATATGTCAAAAGCTGTCGGTAT
+TGATTTAGGTACAACATACTCGTGTGTTGCTCACTTTGCTAATGATCGTGTGGACATTAT
+TGCCAACGATCAAGGTAACAGAACCACTCCATCTTTTGTCGCTTTCACTGACACTGAAAG
+ATTGATTGGTGATGCTGCTAAGAATCAAGCTGCTATGAATCCTTCGAATACCGTTTTCGA
+CGCTAAGCGTTTGATCGGTAGAAACTTCAACGACCCAGAAGTGCAGGCTGACATGAAGCA
+CTTCCCATTCAAGTTGATCGATGTTGACGGTAAGCCTCAAATTCAAGTTGAATTTAAGGG
+TGAAACCAAGAACTTTACCCCAGAACAAATCTCCTCCATGGTCTTGGGTAAGATGAAGGA
+AACTGCCGAATCTTACTTGGGAGCCAAGGTCAATGACGCTGTCGTCACTGTCCCAGCTTA
+CTTCAACGATTCTCAAAGACAAGCTACCAAGGATGCTGGTACCATTGCTGGTTTGAATGT
+CTTGCGTATTATTAACGAACCTACCGCCGCTGCCATTGCTTACGGTTTGGACAAGAAGGG
+TAAGGAAGAACACGTCTTGATTTTCGACTTGGGTGGTGGTACTTTCGATGTCTCTTTGTT
+GTTCATTGAAGACGGTATCTTTGAAGTTAAGGCCACCGCTGGTGACACCCATTTGGGTGG
+TGAAGATTTTGACAACAGATTGGTCAACCACTTCATCCAAGAATTCAAGAGAAAGAACAA
+GAAGGACTTGTCTACCAACCAAAGAGCTTTGAGAAGATTAAGAACCGCTTGTGAAAGAGC
+CAAGAGAACTTTGTCTTCCTCCGCTCAAACTTCCGTTGAAATTGACTCTTTGTTCGAAGG
+TATCGATTTCTACACTTCCATCACCAGAGCCAGATTCGAAGAATTGTGTGCTGACTTGTT
+CAGATCTACTTTGGACCCAGTTGAAAAGGTCTTGAGAGATGCTAAATTGGACAAATCTCA
+AGTCGATGAAATTGTCTTGGTCGGTGGTTCTACCAGAATTCCAAAGGTCCAAAAATTGGT
+CACTGACTACTTCAACGGTAAGGAACCAAACAGATCTATCAACCCAGATGAAGCTGTTGC
+TTACGGTGCTGCTGTTCAAGCTGCTATTTTGACTGGTGACGAATCTTCCAAGACTCAAGA
+TCTATTGTTGTTGGATGTCGCTCCATTATCCTTGGGTATTGAAACTGCTGGTGGTGTCAT
+GACCAAGTTGATTCCAAGAAACTCTACCATTTCAACAAAGAAGTTCGAGATCTTTTCCAC
+TTATGCTGATAACCAACCAGGTGTCTTGATTCAAGTCTTTGAAGGTGAAAGAGCCAAGAC
+TAAGGACAACAACTTGTTGGGTAAGTTCGAATTGAGTGGTATTCCACCAGCTCCAAGAGG
+TGTCCCACAAATTGAAGTCACTTTCGATGTCGACTCTAACGGTATTTTGAATGTTTCCGC
+CGTCGAAAAGGGTACTGGTAAGTCTAACAAGATCACTATTACCAACGACAAGGGTAGATT
+GTCCAAGGAAGATATCGAAAAGATGGTTGCTGAAGCCGAAAAATTCAAGGAAGAAGATGA
+AAAGGAATCTCAAAGAATTGCTTCCAAGAACCAATTGGAATCCATTGCTTACTCTTTGAA
+GAACACCATTTCTGAAGCTGGTGACAAATTGGAACAAGCTGACAAGGACACCGTCACCAA
+GAAGGCTGAAGAGACTATTTCTTGGTTAGACAGCAACACCACTGCCAGCAAGGAAGAATT
+CGATGACAAGTTGAAGGAGTTGCAAGACATTGCCAACCCAATCATGTCTAAGTTGTACCA
+AGCTGGTGGTGCTCCAGGTGGCGCTGCAGGTGGTGCTCCAGGCGGTTTCCCAGGTGGTGC
+TCCTCCAGCTCCAGAGGCTGAAGGTCCAACCGTTGAAGAAGTTGATTAAGCCAATTGGTG
+CGGCAATTGATAATAACGAAAATGTCTTTTAATGATCTGGGTATAATGAGGAATTTTCCG
+AACGTTTTTACTTTATATATATATATACATGTAACATATATTCTATACGCTATAGAGAAA
+GGAAATTTTTCAATTAAAAAAAAAATAGAGAAAGAGTTTCACTTCTTGATTATCGCTAAC
+ACTAATGGTTGAAGTACTGCTACTTTAATTTTATAGATAGGCAAAAAAAAATTATTCGGG
+GCGAGCTGGGAATTGAACCCAGGGCCTCTCGCATGCTTTGTCTTCCTGTTTAATCAGGAA
+GTCGCCCAAAGCGAGAATCATACCACTAGACCACACGCCCGTACTAATTGATGTCTTCCT
+TTTCGGATAGATGTATATATATACAAATTGGTCAGATTGCTTTTGGCTCCCTTTCGTACG
+TAACTCATTTAGACTACGGATCACTAGCACTATCTCACCAAGTTTTTAAAAGATCCACTG
+TGATCATTAAAGATTCTATTTCAAATAAAAATCAATTATCATCTATCGACTAGTTTTCAT
+GGTACTAGTATATTATCATGTACAGTGTGAGGGCTCGACATGAAGATTGAGAAGACAGTC
+ATCGTTTTCAATGGAAGCTGAAATACAAGCACTGAATAAAAAGAGATAACAATATTTTAA
+TGTCGTGATATTGGTTCCCTTCTGCTGGTCTTCGAGGAAAATTTCTGGTATGTTTTGTGT
+ATCTAATATTATATAGCCTTAAATCACTACCTATTTTTCAGATCCGCCAAGGAAGCCACT
+CTGTGAACTTTAAATAATATTACAAATTGCTACAGCATCTCGAATTCAAGTGTTTTCAGA
+CTTATTTATTTAATTCAAGAAAATGTTCATAAACTTTCATAAAGCAATCCTCAATCCTCT
+TAGCGCTAATGTTCACTGCACCTTACATGATGGCGTTTGGTTATATTTCAATCCTTTTCA
+GCGACGTGTCCCGAAACACGAAAAAGTAC
+>YAL007C ERP2 SGDID:S0000005, Chr I from 139347-136700, reverse complement, Verified ORF
+AGAGAAAGAGTTTCACTTCTTGATTATCGCTAACACTAATGGTTGAAGTACTGCTACTTT
+AATTTTATAGATAGGCAAAAAAAAATTATTCGGGGCGAGCTGGGAATTGAACCCAGGGCC
+TCTCGCATGCTTTGTCTTCCTGTTTAATCAGGAAGTCGCCCAAAGCGAGAATCATACCAC
+TAGACCACACGCCCGTACTAATTGATGTCTTCCTTTTCGGATAGATGTATATATATACAA
+ATTGGTCAGATTGCTTTTGGCTCCCTTTCGTACGTAACTCATTTAGACTACGGATCACTA
+GCACTATCTCACCAAGTTTTTAAAAGATCCACTGTGATCATTAAAGATTCTATTTCAAAT
+AAAAATCAATTATCATCTATCGACTAGTTTTCATGGTACTAGTATATTATCATGTACAGT
+GTGAGGGCTCGACATGAAGATTGAGAAGACAGTCATCGTTTTCAATGGAAGCTGAAATAC
+AAGCACTGAATAAAAAGAGATAACAATATTTTAATGTCGTGATATTGGTTCCCTTCTGCT
+GGTCTTCGAGGAAAATTTCTGGTATGTTTTGTGTATCTAATATTATATAGCCTTAAATCA
+CTACCTATTTTTCAGATCCGCCAAGGAAGCCACTCTGTGAACTTTAAATAATATTACAAA
+TTGCTACAGCATCTCGAATTCAAGTGTTTTCAGACTTATTTATTTAATTCAAGAAAATGT
+TCATAAACTTTCATAAAGCAATCCTCAATCCTCTTAGCGCTAATGTTCACTGCACCTTAC
+ATGATGGCGTTTGGTTATATTTCAATCCTTTTCAGCGACGTGTCCCGAAACACGAAAAAG
+TACATTCTGCCAAAACTTGAACTAGTTAAAATGAAAAACTAACAAAATCTCCAGTGATTT
+CGATTCATTAACCTTAGGGATCGACACCGCCAATTCTGCATCAGGTGACCCAAACTAGAA
+TAAGATAAACAGTGGAGTACACTCATTGTAAAGGAAAACCATGATCAAATCTACAATTGC
+TCTACCCTCTTTCTTCATTGTTTTAATTTTGGCGTTGGTCAATTCAGTGGCTGCATCTTC
+CAGTTATGCACCAGTTGCTATCAGTTTACCAGCATTCAGCAAAGAATGCCTGTACTACGA
+TATGGTTACTGAGGATGATTCCCTGGCTGTGGGTTACCAAGTTCTAACCGGTGGTAATTT
+TGAGATTGATTTTGATATTACTGCTCCTGATGGATCTGTGATTACTAGTGAGAAACAAAA
+GAAGTACTCAGACTTTTTATTAAAATCGTTTGGAGTGGGGAAGTATACCTTTTGTTTTTC
+CAATAACTATGGTACAGCGTTGAAAAAGGTAGAAATTACACTAGAAAAGGAAAAAACTTT
+GACTGACGAGCATGAAGCTGATGTCAATAACGATGACATTATTGCCAATAACGCCGTGGA
+GGAAATAGATAGAAACTTGAACAAAATCACGAAGACTTTGAACTATTTGAGAGCCAGAGA
+ATGGAGAAACATGTCTACCGTCAATTCCACCGAATCAAGGTTGACTTGGTTATCTATATT
+GATTATCATTATCATTGCCGTTATAAGTATTGCCCAAGTTCTACTTATTCAATTCCTCTT
+CACTGGTCGTCAAAAAAATTACGTTTAAGAACTTTTCAATCTACGAAAAATATATGTCCG
+CAATATAGAACACAATTAGGTTTATATTCGACGTGATTTTTTTTTCTTCCTTAGCCCTAT
+GTATATTTACTGTATAGGATAAATGAAATACCAAAAATAAAAAAGTATAAAACGAAAGAA
+TATAACCCTCGTTTATATCTGGTCATTTGTCTTGCTCATTTGTTAGCATTTAAACTTGCT
+AATACGAAACTCAATGTAAAGAATACTTTAAAACCCATGTATTCTGTACCCAATAACCCA
+TCAATAAGCAGTTTCTTCAAGTCTACATCTTTCAAAACAATTACAGATTTGATATTTTTC
+AAATTAATGCGAATCCATCCCTTGTACCGTAACCATTCACAAAGAAGCATGCTTGTAATA
+CCGACATACATAAACAAAATTGATATCTTAGCCACCGTAACTCCTAAAACAACACCGAAT
+AATGATCCCAAAAACATCTGCTTGTGACTACTTATTTTGTTCCTCCTTTCCTCGGTAATT
+GCCGTACTGTTGCCCACAGTTGGTTCCAAGGGACCCTGTTGTTTGACAGCTGCCCCTAAA
+GAATCATTGAATATCAACTTAGAAGGCTGGCGATTCATCAAGTAAAAGCCACCAGCAACT
+ATGCCAGCACCGCCTAACCCTACAGAGCGAGTTAAGGGCTTTCCTAATTTATTGGCGACA
+TTAAACCTCCATAAGGGGACGTTCTTGAACATGCGCTTCCCAACATTCAAATTACGAAAC
+ACCAACCGTTGCATATTAAAAGCCAAAGTCATTTTACACCTATTATTTTAAGTTCTTCCT
+TCTTGCCCCTCTAGCGTTATGCTGTTGTATGTATAATAAGATGCCTTGTTTAACCAATTT
+CAACAACCCCTAGGTTTTCTAGCACTTGTTGGAAAACTTAAATATGAAATGACGGATTTG
+TAAAATTTTCACGACACAACAGCTTCACTTCTAATAATGTATAAAATATAATTTATGTGT
+GAACATAG
+>YAL008W FUN14 SGDID:S0000006, Chr I from 135916-138512, Verified ORF
+GTGTCCGGGCCTCGCAGGCGTTCTACTAGCAAGACATCCAGTGCGAAGAATATACGGAAC
+TCCAGTAATATCTCTCCAGCATCGATGATTTTCAGGAATTTGTTGATACTGGAGGATGAT
+TTAAGACGCCAAGCTCACGAACAAAAGATACTGAAGTGGCAATTCACTTTGTTCTTAGCG
+TCTATGGCCGGTGTAGGCGCATTTACCTTCTACGAACTTTATTTCACTTCAGATTATGTC
+AAGGGCCTCCATAGGGTTATTTTGCAATTCACTCTTTCTTTCATTTCCATTACTGTAGTT
+CTTTTTCATATCAGTGGACAATATAGAAGAACTATCGTCATTCCAAGAAGATTTTTTACC
+TCTACTAATAAAGGGATTAGGCAGTTTAATGTGAAGCTAGTTAAAGTACAGTCTACGTGG
+GACGAGAAATACACAGATTCAGTAAGATTTGTGAGTCGAACAATTGCTTATTGTAATATT
+TATTGTTTGAAAAAATTTCTGTGGCTTAAAGACGATAATGCCATTGTGAAATTTTGGAAA
+AGTGTCACGATACAATCCCAACCGAGGATCGGAGCTGTGGATGTGAAATTAGTCCTCAAC
+CCCAGAGCATTTAGTGCAGAGATTAGAGAAGGATGGGAGATTTATAGAGACGAGTTTTGG
+GCCAGGGAAGGTGCTAGAAGACGCAAACAAGCGCACGAACTCCGACCTAAATCAGAATGA
+AAGAGTTGGAGGGCTTCTTCCTTCGAATAAGAGGTCATATTTACCTATGTAAAATTGTAA
+CCATCTATGTTCACACATAAATTATATTTTATACATTATTAGAAGTGAAGCTGTTGTGTC
+GTGAAAATTTTACAAATCCGTCATTTCATATTTAAGTTTTCCAACAAGTGCTAGAAAACC
+TAGGGGTTGTTGAAATTGGTTAAACAAGGCATCTTATTATACATACAACAGCATAACGCT
+AGAGGGGCAAGAAGGAAGAACTTAAAATAATAGGTGTAAAATGACTTTGGCTTTTAATAT
+GCAACGGTTGGTGTTTCGTAATTTGAATGTTGGGAAGCGCATGTTCAAGAACGTCCCCTT
+ATGGAGGTTTAATGTCGCCAATAAATTAGGAAAGCCCTTAACTCGCTCTGTAGGGTTAGG
+CGGTGCTGGCATAGTTGCTGGTGGCTTTTACTTGATGAATCGCCAGCCTTCTAAGTTGAT
+ATTCAATGATTCTTTAGGGGCAGCTGTCAAACAACAGGGTCCCTTGGAACCAACTGTGGG
+CAACAGTACGGCAATTACCGAGGAAAGGAGGAACAAAATAAGTAGTCACAAGCAGATGTT
+TTTGGGATCATTATTCGGTGTTGTTTTAGGAGTTACGGTGGCTAAGATATCAATTTTGTT
+TATGTATGTCGGTATTACAAGCATGCTTCTTTGTGAATGGTTACGGTACAAGGGATGGAT
+TCGCATTAATTTGAAAAATATCAAATCTGTAATTGTTTTGAAAGATGTAGACTTGAAGAA
+ACTGCTTATTGATGGGTTATTGGGTACAGAATACATGGGTTTTAAAGTATTCTTTACATT
+GAGTTTCGTATTAGCAAGTTTAAATGCTAACAAATGAGCAAGACAAATGACCAGATATAA
+ACGAGGGTTATATTCTTTCGTTTTATACTTTTTTATTTTTGGTATTTCATTTATCCTATA
+CAGTAAATATACATAGGGCTAAGGAAGAAAAAAAAATCACGTCGAATATAAACCTAATTG
+TGTTCTATATTGCGGACATATATTTTTCGTAGATTGAAAAGTTCTTAAACGTAATTTTTT
+TGACGACCAGTGAAGAGGAATTGAATAAGTAGAACTTGGGCAATACTTATAACGGCAATG
+ATAATGATAATCAATATAGATAACCAAGTCAACCTTGATTCGGTGGAATTGACGGTAGAC
+ATGTTTCTCCATTCTCTGGCTCTCAAATAGTTCAAAGTCTTCGTGATTTTGTTCAAGTTT
+CTATCTATTTCCTCCACGGCGTTATTGGCAATAATGTCATCGTTATTGACATCAGCTTCA
+TGCTCGTCAGTCAAAGTTTTTTCCTTTTCTAGTGTAATTTCTACCTTTTTCAACGCTGTA
+CCATAGTTATTGGAAAAACAAAAGGTATACTTCCCCACTCCAAACGATTTTAATAAAAAG
+TCTGAGTACTTCTTTTGTTTCTCACTAGTAATCACAGATCCATCAGGAGCAGTAATATCA
+AAATCAATCTCAAAATTACCACCGGTTAGAACTTGGTAACCCACAGCCAGGGAATCATCC
+TCAGTAACCATATCGTAGTACAGGCATTCTTTGCTGAATGCTGGTAAACTGATAGCAACT
+GGTGCATAACTGGAAGATGCAGCCACTGAATTGACCAACGCCAAAATTAAAACAATGAAG
+AAAGAGGGTAGAGCAATTGTAGATTTGATCATGGTTTTCCTTTACAATGAGTGTACTCCA
+CTGTTTATCTTATTCTAGTTTGGGTCACCTGATGCAGAATTGGCGGTGTCGATCCCTAAG
+GTTAATGAATCGAAATCACTGGAGATTTTGTTAGTTTTTCATTTTAACTAGTTCAAGTTT
+TGGCAGAATGTACTTTT
+>YAL009W SPO7 SGDID:S0000007, Chr I from 134856-137635, Verified ORF
+CAAGATAATGTCAAAGTTAGTGGTCGTCCTGTGTTTGTAGAATGTGTGTAATATCTTAAA
+GTTGTCGAACAACCGGGGCTTAAACTTACTAACCCTAACCAAAATTCAGCACCAAGCGAC
+AACGAAGAATTATTGTACAGTGAGGTGTTAAACTTGGAAGGCGTGGTAAGGAAATTGTGT
+AATACTCTATAACCACATAATAGATCACTGGTGGAAAATATCCACTCCTGTAAATTGCGG
+TGAGAATCTCTTTGAAAATAGCACGTTAAAACGTTTAAGCTTTCTTTGAAACTACTGACA
+CCCTTAAGCATAAATTGGGTTTGTGGACTTAGTCGTTTTATTATCATTGCTTCTAAATCA
+GAGCTGGGGTAGTACATTCTACCATAATAAAGGGATTTTTTAACAAATTTCGAGTCATGT
+AGTAATTTCTTGTCATTGTCGACTGTGGTGTTGTCACTACTCAACGTATTCGCACTACTA
+ACACTGAAATTGAGGTTTGGTTGCAATTGTCTGTATGTTTCGGTGGCATCTTGTAATGGG
+ATATCAGTAGAGTTGCGCATGAATTTCTCCAATTGCTGTGCATCGGAGTATAAATAACTC
+AGAGAACCATTTATCCTGGACCTCGTAGAAAAATCTAAAGAATTGAATGTATTGGGAGTA
+GATTTGTTGGAAATTTGCAGGTGTATTGCTGAGGGAATTCGGAAATCTAATAATGTTCTC
+GATGTGGCCGTTATATCCTCGTAGCTATTTTGCGTACTCCAATGGGTGCTCTGATAAAAT
+GCCCTTAGTACTTGGTCCATATAGGGTAGCATCAAGATCGGTCTTCTCTGTTCGTGTCTT
+TTTCCTAACGTATATTTGCTTTGTTTCTTCACTCAACAATAAAGTCAAAGTAAAATTAAA
+TACTAATTATTCTTAAAAGGGAAGATGCGAAATTTAGCGAAAATCTATTGATTATACACA
+CAAAGGAAGAAAGGTAGTGGAAAGCTAAATAAAGGAGGTCATGGAGCCAGAGAGCATAGG
+CGATGTGGGGAACCATGCCCAGGATGATAGTGCCAGTATAGTGTCCGGGCCTCGCAGGCG
+TTCTACTAGCAAGACATCCAGTGCGAAGAATATACGGAACTCCAGTAATATCTCTCCAGC
+ATCGATGATTTTCAGGAATTTGTTGATACTGGAGGATGATTTAAGACGCCAAGCTCACGA
+ACAAAAGATACTGAAGTGGCAATTCACTTTGTTCTTAGCGTCTATGGCCGGTGTAGGCGC
+ATTTACCTTCTACGAACTTTATTTCACTTCAGATTATGTCAAGGGCCTCCATAGGGTTAT
+TTTGCAATTCACTCTTTCTTTCATTTCCATTACTGTAGTTCTTTTTCATATCAGTGGACA
+ATATAGAAGAACTATCGTCATTCCAAGAAGATTTTTTACCTCTACTAATAAAGGGATTAG
+GCAGTTTAATGTGAAGCTAGTTAAAGTACAGTCTACGTGGGACGAGAAATACACAGATTC
+AGTAAGATTTGTGAGTCGAACAATTGCTTATTGTAATATTTATTGTTTGAAAAAATTTCT
+GTGGCTTAAAGACGATAATGCCATTGTGAAATTTTGGAAAAGTGTCACGATACAATCCCA
+ACCGAGGATCGGAGCTGTGGATGTGAAATTAGTCCTCAACCCCAGAGCATTTAGTGCAGA
+GATTAGAGAAGGATGGGAGATTTATAGAGACGAGTTTTGGGCCAGGGAAGGTGCTAGAAG
+ACGCAAACAAGCGCACGAACTCCGACCTAAATCAGAATGAAAGAGTTGGAGGGCTTCTTC
+CTTCGAATAAGAGGTCATATTTACCTATGTAAAATTGTAACCATCTATGTTCACACATAA
+ATTATATTTTATACATTATTAGAAGTGAAGCTGTTGTGTCGTGAAAATTTTACAAATCCG
+TCATTTCATATTTAAGTTTTCCAACAAGTGCTAGAAAACCTAGGGGTTGTTGAAATTGGT
+TAAACAAGGCATCTTATTATACATACAACAGCATAACGCTAGAGGGGCAAGAAGGAAGAA
+CTTAAAATAATAGGTGTAAAATGACTTTGGCTTTTAATATGCAACGGTTGGTGTTTCGTA
+ATTTGAATGTTGGGAAGCGCATGTTCAAGAACGTCCCCTTATGGAGGTTTAATGTCGCCA
+ATAAATTAGGAAAGCCCTTAACTCGCTCTGTAGGGTTAGGCGGTGCTGGCATAGTTGCTG
+GTGGCTTTTACTTGATGAATCGCCAGCCTTCTAAGTTGATATTCAATGATTCTTTAGGGG
+CAGCTGTCAAACAACAGGGTCCCTTGGAACCAACTGTGGGCAACAGTACGGCAATTACCG
+AGGAAAGGAGGAACAAAATAAGTAGTCACAAGCAGATGTTTTTGGGATCATTATTCGGTG
+TTGTTTTAGGAGTTACGGTGGCTAAGATATCAATTTTGTTTATGTATGTCGGTATTACAA
+GCATGCTTCTTTGTGAATGGTTACGGTACAAGGGATGGATTCGCATTAATTTGAAAAATA
+TCAAATCTGTAATTGTTTTGAAAGATGTAGACTTGAAGAAACTGCTTATTGATGGGTTAT
+TGGGTACAGAATACATGGGTTTTAAAGTATTCTTTACATTGAGTTTCGTATTAGCAAGTT
+TAAATGCTAACAAATGAGCAAGACAAATGACCAGATATAAACGAGGGTTATATTCTTTCG
+TTTTATACTTTTTTATTTTTGGTATTTCATTTATCCTATACAGTAAATATACATAGGGCT
+AAGGAAGAAAAAAAAATCAC
diff --git a/inst/sequences/test.aln b/inst/sequences/test.aln
new file mode 100644
index 0000000..1bb5a5e
--- /dev/null
+++ b/inst/sequences/test.aln
@@ -0,0 +1,27 @@
+CLUSTAL W (1.82) multiple sequence alignment
+
+
+FOSB_MOUSE MFQAFPGDYDSGSRCSSSPSAESQYLSSVDSFGSPPTAAASQECAGLGEMPGSFVPTVTA 60
+FOSB_HUMAN MFQAFPGDYDSGSRCSSSPSAESQYLSSVDSFGSPPTAAASQECAGLGEMPGSFVPTVTA 60
+ ************************************************************
+
+FOSB_MOUSE ITTSQDLQWLVQPTLISSMAQSQGQPLASQPPAVDPYDMPGTSYSTPGLSAYSTGGASGS 120
+FOSB_HUMAN ITTSQDLQWLVQPTLISSMAQSQGQPLASQPPVVDPYDMPGTSYSTPGMSGYSSGGASGS 120
+ ********************************.***************:*.**:******
+
+FOSB_MOUSE GGPSTSTTTSGPVSARPARARPRRPREETLTPEEEEKRRVRRERNKLAAAKCRNRRRELT 180
+FOSB_HUMAN GGPSTSGTTSGPGPARPARARPRRPREETLTPEEEEKRRVRRERNKLAAAKCRNRRRELT 180
+ ****** ***** .**********************************************
+
+FOSB_MOUSE DRLQAETDQLEEEKAELESEIAELQKEKERLEFVLVAHKPGCKIPYEEGPGPGPLAEVRD 240
+FOSB_HUMAN DRLQAETDQLEEEKAELESEIAELQKEKERLEFVLVAHKPGCKIPYEEGPGPGPLAEVRD 240
+ ************************************************************
+
+FOSB_MOUSE LPGSTSAKEDGFGWLLPPPPPPPLPFQSSRDAPPNLTASLFTHSEVQVLGDPFPVVSPSY 300
+FOSB_HUMAN LPGSAPAKEDGFSWLLPPPPPPPLPFQTSQDAPPNLTASLFTHSEVQVLGDPFPVVNPSY 300
+ ****:.******.**************:*:**************************.***
+
+FOSB_MOUSE TSSFVLTCPEVSAFAGAQRTSGSEQPSDPLNSPSLLAL 338
+FOSB_HUMAN TSSFVLTCPEVSAFAGAQRTSGSDQPSDPLNSPSLLAL 338
+ ***********************:**************
+
diff --git a/inst/sequences/test.mase b/inst/sequences/test.mase
new file mode 100644
index 0000000..aedbdb5
--- /dev/null
+++ b/inst/sequences/test.mase
@@ -0,0 +1,19 @@
+;;Aligned by clustal on Tue Jun 30 17:36:11 1998
+;empty description
+Langur
+-KIFERCELARTLKKLGLDGYKGVSLANWVCLAKWESGYNTEATNYNPGDESTDYGIFQINSRYWCNNGKPGAVDACHISCSALLQNNIADAVACAKRVVSDQGIRAWVAWRNHCQNKDVSQYVKGCGV-
+;
+Baboon
+-KIFERCELARTLKRLGLDGYRGISLANWVCLAKWESDYNTQATNYNPGDQSTDYGIFQINSHYWCNDGKPGAVNACHISCNALLQDNITDAVACAKRVVSDQGIRAWVAWRNHCQNRDVSQYVQGCGV-
+;
+Human
+-KVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKPGAVNACHLSCSALLQDNIADAVACAKRVVRDQGIRAWVAWRNRCQNRDVRQYVQGCGV-
+;
+Rat
+-KTYERCEFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQARNYDPGDQSTDYGIFQINSRYWCNDGKPRAKNACGIPCSALLQDDITQAIQCAKRVVRDQGIRAWVAWQRHCKNRDLSGYIRNCGV-
+;
+Cow
+-KVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKATNYNPSSESTDYGIFQINSKWWCNDGKPNAVDGCHVSCSELMENDIAKAVACAKKIVSEQGITAWVAWKSHCRDHDVSSYVEGCTL-
+;
+Horse
+-KVFSKCELAHKLKAQEMDGFGGYSLANWVCMAEYESNFNTRAFNGKNANGSSDYGLFQLNNKWWCKDNKRSSSNACNIMCSKLLDENIDDDISCAKRVVRDKGMSAWKAWVKHCKDKDLSEYLASCNL-
diff --git a/inst/sequences/test.msf b/inst/sequences/test.msf
new file mode 100644
index 0000000..8de8b20
--- /dev/null
+++ b/inst/sequences/test.msf
@@ -0,0 +1,57 @@
+PileUp of: @Pi3k.Fil
+
+Symbol comparison table: GenRunData:Pileuppep.Cmp CompCheck: 1254
+
+ GapWeight: 3.000
+ GapLengthWeight: 0.100
+
+ Pi3k.Msf MSF: 377 Type: P July 12, 1996 10:40 Check: 167 ..
+
+ Name: Tor1_Yeast Len: 377 Check: 7773 Weight: 1.00
+ Name: Tor2_Yeast Len: 377 Check: 8562 Weight: 1.00
+ Name: Frap_Human Len: 377 Check: 9129 Weight: 1.00
+ Name: Esr1_Yeast Len: 377 Check: 8114 Weight: 1.00
+ Name: Tel1_Yeast Len: 377 Check: 1564 Weight: 1.00
+ Name: Pi4k_Human Len: 377 Check: 8252 Weight: 1.00
+ Name: Stt4_Yeast Len: 377 Check: 9117 Weight: 1.00
+ Name: Pik1_Yeast Len: 377 Check: 3455 Weight: 1.00
+ Name: P3k1_Soybn Len: 377 Check: 4973 Weight: 1.00
+ Name: P3k2_Soybn Len: 377 Check: 4632 Weight: 1.00
+ Name: Pi3k_Arath Len: 377 Check: 3585 Weight: 1.00
+ Name: Vp34_Yeast Len: 377 Check: 5928 Weight: 1.00
+ Name: P11a_Human Len: 377 Check: 6597 Weight: 1.00
+ Name: P11b_Human Len: 377 Check: 8486 Weight: 1.00
+
+//
+
+ 1 50
+Tor1_Yeast .......GHE DIRQDSLVMQ LFGLVNTLLK NDSECFKRHL DIQQYPAIPL
+Tor2_Yeast .......GHE DIRQDSLVMQ LFGLVNTLLQ NDAECFRRHL DIQQYPAIPL
+Frap_Human .......GHE DLRQDERVMQ LFGLVNTLLA NDPTSLRKNL SIQRYAVIPL
+Esr1_Yeast .......KKE DVRQDNQYMQ FATTMDFLLS KDIASRKRSL GINIYSVLSL
+Tel1_Yeast .KALMKGSND DLRQDAIMEQ VFQQVNKVLQ NDKVLRNLDL GIRTYKVVPL
+Pi4k_Human ..AAIFKVGD DCRQDMLALQ IIDLFKNIFQ LV....GLDL FVFPYRVVAT
+Stt4_Yeast ..AAIFKVGD DCRQDVLALQ LISLFRTIWS SI....GLDV YVFPYRVTAT
+Pik1_Yeast ...VIAKTGD DLRQEAFAYQ MIQAMANIWV KE....KVDV WVKRMKILIT
+P3k1_Soybn TCKIIFKKGD DLRQDQLVVQ MVSLMDRLLK LE....NLDL HLTPYKVLAT
+P3k2_Soybn ....IFKKGD DIRQDQLVVQ MVSLMDRLLK LE....NLDL HLTPYKVLAT
+Pi3k_Arath ..KLIFKKGD DLRQDQLVVQ MVWLMDRLLK LE....NLDL CLTPYKVLAT
+Vp34_Yeast .YHLMFKVGD DLRQDQLVVQ IISLMNELLK NE....NVDL KLTPYKILAT
+P11a_Human ...IIFKNGD DLRQDMLTLQ IIRIMENIWQ NQ....GLDL RMLPYGCLSI
+P11b_Human ...VIFKNGD DLRQDMLTLQ MLRLMDLLWK EA....GLDL RMLPYGCLAT
+
+ 51 100
+Tor1_Yeast SPKSGLLGWV PNSDTFHVLI REHRDAKKIP LNIEHWVMLQ MAPDYENLTL
+Tor2_Yeast SPKSGLLGWV PNSDTFHVLI REHREAKKIP LNIEHWVMLQ MAPDYDNLTL
+Frap_Human STNSGLIGWV PHCDTLHALI RDYREKKKIL LNIEHRIMLR MAPDYDHLTL
+Esr1_Yeast REDCGILEMV PNVVTLRSIL STKYESLKIK Y.....SLKS LHDRWQHTAV
+Tel1_Yeast GPKAGIIEFV ANSTSLHQIL SKLHTNDKIT FDQARKGMKA VQTKSN....
+Pi4k_Human APGCGVIECI PDCTS..... .......... RDQLGRQTDF GMYDYFTRQY
+Stt4_Yeast APGCGVIDVL PNSVS..... .......... RDMLGREAVN GLYEYFTSKF
+Pik1_Yeast SANTGLVETI TNAMSVHSIK KALTKKMIED AELDDKGGIA SLNDHFLRAF
+P3k1_Soybn GQDEGMLEFI P.SRSLAQI. .......... ..LSENRSII SYLQ......
+P3k2_Soybn GQDEGMLEFI P.SRSLAQI. .......... ..LSENRSII SYLQ......
+Pi3k_Arath GHDEGMLEFI P.SRSLAQI. .......... ..LSEHRSIT SYLQ......
+Vp34_Yeast GPQEGAIEFI P.NDTLASI. .......... ..LSKYHGIL GYLK......
+P11a_Human GDCVGLIEVV RNSHTIMQI. .......... ..Q.CKGGLK GALQFNSHTL
+P11b_Human GDRSGLIEVV STSETIADI. .......... ..QLNSSNVA AAAAFNKDAL
diff --git a/inst/sequences/test.phylip b/inst/sequences/test.phylip
new file mode 100644
index 0000000..1bc5dad
--- /dev/null
+++ b/inst/sequences/test.phylip
@@ -0,0 +1,12 @@
+ 5 42
+Turkey AAGCTNGGGC ATTTCAGGGT
+Salmo gairAAGCCTTGGC AGTGCAGGGT
+H. SapiensACCGGTTGGC CGTTCAGGGT
+Chimp AAACCCTTGC CGTTACGCTT
+Gorilla AAACCCTTGC CGGTACGCTT
+
+GAGCCCGGGC AATACAGGGT AT
+GAGCCGTGGC CGGGCACGGT AT
+ACAGGTTGGC CGTTCAGGGT AA
+AAACCGAGGC CGGGACACTC AT
+AAACCATTGC CGGTACGCTT AA
diff --git a/man/AAstat.Rd b/man/AAstat.Rd
new file mode 100644
index 0000000..49ab9ca
--- /dev/null
+++ b/man/AAstat.Rd
@@ -0,0 +1,33 @@
+\name{AAstat}
+\alias{AAstat}
+\title{ To Get Some Protein Statistics }
+\description{
+ Returns simple protein sequence information including the number of residues,
+ the percentage physico-chemical classes and the theoretical isoelectric point.
+}
+\usage{
+AAstat(seq, plot = TRUE)
+}
+\arguments{
+ \item{seq}{ a protein sequence as a vector of upper-case chars }
+ \item{plot}{ if \code{TRUE}, plots the presence of residues splited by
+ physico-chemical classes along the sequence. }
+}
+\value{
+ A list with the three following components:
+ \item{Compo}{A factor giving the amino acid counts.}
+ \item{Prop}{A list giving the percentage of each physico-chemical classes
+ (Tiny, Small, Aliphatic, Aromatic, Non-polar, Polar, Charged, Positive, Negative).}
+ \item{Pi}{The theoretical isoelectric point}
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry}
+\seealso{ \code{\link{computePI}}, \code{\link{SEQINR.UTIL}}, \code{\link{SeqFastaAA}}}
+\examples{
+ seqAA <- read.fasta(file = system.file("sequences/seqAA.fasta", package = "seqinr"),
+ seqtype = "AA")
+ AAstat(seqAA[[1]])
+}
+\keyword{utilities}
diff --git a/man/AnoukResult.Rd b/man/AnoukResult.Rd
new file mode 100644
index 0000000..f5cc395
--- /dev/null
+++ b/man/AnoukResult.Rd
@@ -0,0 +1,29 @@
+\name{AnoukResult}
+\alias{AnoukResult}
+\docType{data}
+\title{Expected numeric results for Ka and Ks computation}
+\description{
+This data set is what should be obtained when runing \code{kaks()}
+on the test file Anouk.fasta in the sequences directory of the
+seqinR package.
+}
+\usage{data(AnoukResult)}
+\format{
+ A list with 4 components of class dist.
+ \describe{
+ \item{ka}{Ka}
+ \item{ks}{Ks}
+ \item{vka}{variance for Ka}
+ \item{vks}{variance for Ks}
+ }
+}
+\details{
+See the example in \code{\link{kaks}}.
+}
+\source{
+The fasta test file was provided by Anamaria Necşulea.
+}
+\references{
+\code{citation("seqinr")}
+}
+\keyword{datasets}
diff --git a/man/ECH.Rd b/man/ECH.Rd
new file mode 100644
index 0000000..a32a889
--- /dev/null
+++ b/man/ECH.Rd
@@ -0,0 +1,37 @@
+\name{ECH}
+\alias{ECH}
+\docType{data}
+\title{Forensic Genetic Profile Allelic Ladder Raw Data}
+\description{
+ This is an example of allelic ladder raw data for a human STR genetic profile at 16 loci
+ (\emph{viz.} D8S1179, D21S11, D7S820, CSF1PO, D3S1358, TH01, D13S317, D16S539,
+ D2S1338, D19S433, vWA, TPOX, D18S51, Amelogenin, D5S818, FGA) which
+ are commonly used in forensic sciences for individual identifications.
+}
+\usage{data(ECH)}
+\format{
+ A list with 3 components as in \code{\link{JLO}}
+}
+\source{
+ Data were kindly provided by the INPS (Institut National de Police Scientifique)
+ which is the national forensic sciences institute in France. Experiments
+ were done at the LPS (Laboratoire de Police Scientifique de Lyon) in 2008.
+}
+\references{
+ \code{citation("seqinr")}
+
+ Anonymous (2006) Applied Biosystem Genetic Analysis Data File Format.
+ Available at \url{http://www.appliedbiosystems.com/support/software_community/ABIF_File_Format.pdf}.
+ Last visited on 03-NOV-2008.
+}
+\author{J.R. Lobry}
+\seealso{
+ function \code{\link{read.abif}} to import files in ABIF format,
+ data \code{\link{gs500liz}} for internal size standards,
+ data \code{\link{identifiler}} for allele names in the allelic ladder,
+ data \code{\link{JLO}} for an example of an individual sample file.
+}
+\examples{
+data(JLO)
+}
+
diff --git a/man/EXP.Rd b/man/EXP.Rd
new file mode 100644
index 0000000..18f8de7
--- /dev/null
+++ b/man/EXP.Rd
@@ -0,0 +1,99 @@
+\name{EXP}
+\alias{EXP}
+\docType{data}
+\title{Vectors of coefficients to compute linear forms.}
+\description{
+This dataset is used to compute linear forms on codon frequencies:
+if \code{codfreq} is a vector of codon frequencies then
+\code{drop(freq \%*\% EXP$CG3)} will return for instance the G+C content
+in third codon positions. Base order is the lexical order: a,
+c, g, t (or u).
+}
+\usage{data(EXP)}
+\format{List of 24 vectors of coefficients
+\describe{
+ \item{A}{num [1:4] 1 0 0 0}
+ \item{A3}{num [1:64] 1 0 0 0 1 0 0 0 1 0 ...}
+ \item{AGZ}{num [1:64] 0 0 0 0 0 0 0 0 1 0 ...}
+ \item{ARG}{num [1:64] 0 0 0 0 0 0 0 0 1 0 ...}
+ \item{AU3}{num [1:64] 1 0 0 1 1 0 0 1 1 0 ...}
+ \item{BC}{num [1:64] 0 1 0 0 0 0 0 0 0 0 ...}
+ \item{C}{num [1:4] 0 1 0 0}
+ \item{C3}{num [1:64] 0 1 0 0 0 1 0 0 0 1 ...}
+ \item{CAI}{num [1:64] 0.00 0.00 -1.37 -2.98 -2.58 ...}
+ \item{CG}{num [1:4] 0 1 1 0}
+ \item{CG1}{num [1:64] 0 0 0 0 0 0 0 0 0 0 ...}
+ \item{CG12}{num [1:64] 0 0 0 0 0.5 0.5 0.5 0.5 0.5 0.5 ...}
+ \item{CG2}{num [1:64] 0 0 0 0 1 1 1 1 1 1 ...}
+ \item{CG3}{num [1:64] 0 1 1 0 0 1 1 0 0 1 ...}
+ \item{CGN}{num [1:64] 0 0 0 0 0 0 0 0 0 0 ...}
+ \item{F1}{num [1:64] 1.026 0.239 1.026 0.239 -0.097 ...}
+ \item{G}{num [1:4] 0 0 1 0}
+ \item{G3}{num [1:64] 0 0 1 0 0 0 1 0 0 0 ...}
+ \item{KD}{num [1:64] -3.9 -3.5 -3.9 -3.5 -0.7 -0.7 -0.7 -0.7 -4.5 -0.8 ...}
+ \item{Q}{num [1:64] 0 0 0 0 1 1 1 1 0 0 ...}
+ \item{QA3}{num [1:64] 0 0 0 0 1 0 0 0 0 0 ...}
+ \item{QC3}{num [1:64] 0 0 0 0 0 1 0 0 0 0 ...}
+ \item{U}{num [1:4] 0 0 0 1}
+ \item{U3}{num [1:64] 0 0 0 1 0 0 0 1 0 0 ...}
+}
+}
+\details{
+It's better to work directly at the amino-acid level
+when computing linear forms on amino-acid frequencies so as to have
+a single coefficient vector. For instance \code{EXP$KD} to compute the Kyte
+and Doolittle hydrophaty index from codon frequencies is valid only
+for the standard genetic code.\cr
+\cr
+An alternative for \code{drop(freq \%*\% EXP$CG3)} is \code{
+sum( freq * EXP$CG3 )}, but this is less efficient in terms of CPU
+time. The advantage of the latter, however, is that thanks to
+recycling rules you can use either \code{sum( freq * EXP$A )}
+or \code{sum( freq * EXP$A3 )}. To do the same with the \%*\%
+operator you have to explicit the recycling rule as in \code{
+drop( freq \%*\% rep(EXP$A, 16))}.
+}
+\source{
+ANALSEQ EXPFILEs for command EXP.\cr
+\url{http://pbil.univ-lyon1.fr/software/doclogi/docanals/manuel.html}
+}
+\references{
+ \code{citation("seqinr")}
+ \describe{
+ \item{A}{content in A nucleotide}
+ \item{A3}{content in A nucleotide in third position of codon}
+ \item{AGZ}{Arg content (aga and agg codons)}
+ \item{ARG}{Arg content}
+ \item{AU3}{content in A and U nucleotides in third position of codon}
+ \item{BC}{Good choice (Bon choix). Gouy M., Gautier C. (1982)
+ codon usage in bacteria : Correlation with gene expressivity. \emph{Nucleic Acids Research},\bold{10(22)}:7055-7074.}
+ \item{C}{content in C nucleotides}
+ \item{C3}{content in A nucleotides in third position of codon}
+ \item{CAI}{Codon adaptation index for E. coli. Sharp, P.M., Li, W.-H. (1987) The codon adaptation index -
+a measure of directionam synonymous codon usage bias, and its potential
+applications. \emph{Nucleic Acids Research},\bold{15}:1281-1295.}
+ \item{CG}{content in G + C nucleotides}
+ \item{CG1}{content in G + C nucleotides in first position of codon}
+ \item{CG12}{content in G + C nucleotides in first and second position of codon}
+ \item{CG2}{content in G + C nucleotides in second position of codon}
+ \item{CG3}{content in G + C nucleotides in third position of codon}
+ \item{CGN}{content in CGA + CGU + CGA + CGG}
+ \item{F1}{From Table 2 in Lobry, J.R., Gautier, C. (1994) Hydrophobicity,
+expressivity and aromaticity are the major trends of amino-acid usage in
+999 \emph{Escherichia coli} chromosome-encode genes. \emph{Nucleic Acids
+Research},\bold{22}:3174-3180.}
+ \item{G3}{content in G nucleotides in third position of codon}
+ \item{KD}{Kyte, J., Doolittle, R.F. (1982) A simple method for displaying
+the hydropathic character of a protein. \emph{J. Mol. Biol.},\bold{157}
+:105-132.}
+ \item{Q}{content in quartet}
+ \item{QA3}{content in quartet with the A nucleotide in third position}
+ \item{QC3}{content in quartet with the A nucleotide in third position}
+ \item{U}{content in U nucleotide}
+ \item{U3}{content in U nucleotides in third position of codon}
+}
+}
+\examples{
+data(EXP)
+}
+\keyword{datasets}
diff --git a/man/GC.Rd b/man/GC.Rd
new file mode 100644
index 0000000..01e3042
--- /dev/null
+++ b/man/GC.Rd
@@ -0,0 +1,202 @@
+\name{G+C Content}
+\alias{GC}
+\alias{GC1}
+\alias{GC2}
+\alias{GC3}
+\alias{GCpos}
+\title{Calculates the fractional G+C content of nucleic acid sequences.}
+\description{
+ Calculates the fraction of G+C bases of the input nucleic acid
+ sequence(s). It reads in nucleic acid sequences, sums the number of
+ 'g' and 'c' bases and writes out the result as the fraction (in the
+ interval 0.0 to 1.0) to the total number of 'a', 'c', 'g' and 't' bases.
+ Global G+C content \code{GC}, G+C in the first position of the codon bases
+ \code{GC1}, G+C in the second position of the codon bases
+ \code{GC2}, and G+C in the third position of the codon bases
+ \code{GC3} can be computed. All functions can take ambiguous bases
+ into account when requested.
+}
+\usage{
+GC(seq, forceToLower = TRUE, exact = FALSE, NA.GC = NA, oldGC = FALSE)
+GC1(seq, frame = 0, ...)
+GC2(seq, frame = 0, ...)
+GC3(seq, frame = 0, ...)
+GCpos(seq, pos, frame = 0, ...)
+}
+\arguments{
+ \item{seq}{a nucleic acid sequence as a vector of single characters}
+ \item{frame}{for coding sequences, an integer (0, 1, 2) giving the frame}
+ \item{forceToLower}{logical. if \code{TRUE} force sequence
+ characters in lower-case. Turn this to \code{FALSE} to save time
+ if your sequence is already in lower-case (cpu time is approximately
+ divided by 3 when turned off)}
+ \item{exact}{logical: if \code{TRUE} ambiguous bases are taken
+ into account when computing the G+C content (see details).
+ Turn this to \code{FALSE} to save time if your you can neglect
+ ambiguous bases in your sequence (cpu time is approximately
+ divided by 3 when turned off)
+}
+ \item{NA.GC}{what should be returned when the GC is impossible to
+ compute from data, for instance with NNNNNNN. This behaviour could
+ be different when argument \code{exact} is \code{TRUE}, for instance
+ the G+C content of WWSS is \code{NA} by default, but is 0.5 when
+ \code{exact} is set to \code{TRUE}}
+ \item{...}{arguments passed to the function \code{GC}}
+ \item{pos}{for coding sequences, the codon position (1, 2, 3) that should be
+ taken into account to compute the G+C content}
+ \item{oldGC}{logical defaulting to \code{FALSE}: should the GC content computed
+ as in seqinR <= 1.0-6, that is as the sum of 'g' and 'c' bases divided by
+ the length of the sequence. As from seqinR >= 1.1-3, this argument is
+ deprecated and a warning is issued.}
+}
+\details{
+ When \code{exact} is set to \code{TRUE} the G+C content is estimated
+ with ambiguous bases taken into account. Note that this is time expensive.
+ A first pass is made on non-ambiguous bases to estimate the probabilities
+ of the four bases in the sequence. They are then used to weight the
+ contributions of ambiguous bases to the G+C content. Let note nx
+ the total number of base 'x' in the sequence. For instance
+ suppose that there are nb bases 'b'. 'b' stands for "not a", that
+ is for 'c', 'g' or 't'. The contribution of 'b' bases to the GC base
+ count will be:
+
+ nb*(nc + ng)/(nc + ng + nt)
+
+ The contribution of 'b' bases to the AT base count will be:
+
+ nb*nt/(nc + ng + nt)
+
+ All ambiguous bases contributions to the AT and GC counts are weighted
+ is similar way and then the G+C content is computed as ngc/(nat + ngc).
+}
+\value{
+ \code{GC} returns the fraction of G+C (in [0,1]) as a numeric vector of length one.
+ \code{GCpos} returns GC at position \code{pos}.
+ \code{GC1}, \code{GC2}, \code{GC3} are wrappers for \code{GCpos} with the
+ argument \code{pos} set to 1, 2, and 3, respectively.
+ \code{NA} is returned when \code{seq} is \code{NA}.
+ \code{NA.GC} defaulting to \code{NA} is returned when the G+C content
+ can not be computed from data.
+}
+\references{
+ \code{citation("seqinr")}.
+
+ The program codonW used here for comparison is available at
+ \url{http://codonw.sourceforge.net/}.
+}
+\seealso{You can use \code{\link{s2c}} to convert a string into a vetor of single
+character and \code{\link{tolower}} to convert upper-case characters into
+lower-case characters. Do not confuse with \code{\link{gc}} for garbage collection.
+}
+\author{D. Charif, L. Palmeira, J.R. Lobry}
+\examples{
+ mysequence <- s2c("agtctggggggccccttttaagtagatagatagctagtcgta")
+ GC(mysequence) # 0.4761905
+ GC1(mysequence) # 0.6428571
+ GC2(mysequence) # 0.3571429
+ GC3(mysequence) # 0.4285714
+#
+# With upper-case characters:
+#
+ myUCsequence <- s2c("GGGGGGGGGA")
+ GC(myUCsequence) # 0.9
+#
+# With ambiguous bases:
+#
+ GC(s2c("acgt")) # 0.5
+ GC(s2c("acgtssss")) # 0.5
+ GC(s2c("acgtssss"), exact = TRUE) # 0.75
+#
+# Missing data:
+#
+ stopifnot(is.na(GC(s2c("NNNN"))))
+ stopifnot(is.na(GC(s2c("NNNN"), exact = TRUE)))
+ stopifnot(is.na(GC(s2c("WWSS"))))
+ stopifnot(GC(s2c("WWSS"), exact = TRUE) == 0.5)
+#
+# Coding sequences tests:
+#
+ cdstest <- s2c("ATGATG")
+ stopifnot(GC3(cdstest) == 1)
+ stopifnot(GC2(cdstest) == 0)
+ stopifnot(GC1(cdstest) == 0)
+#
+# How to reproduce the results obtained with the C program codonW
+# version 1.4.4 writen by John Peden. We use here the "input.dat"
+# test file from codonW (there are no ambiguous base in these
+# sequences).
+#
+ inputdatfile <- system.file("sequences/input.dat", package = "seqinr")
+ input <- read.fasta(file = inputdatfile) # read the FASTA file
+ inputoutfile <- system.file("sequences/input.out", package = "seqinr")
+ input.res <- read.table(inputoutfile, header = TRUE) # read codonW result file
+#
+# remove stop codon before computing G+C content (as in codonW)
+#
+ GC.codonW <- function(dnaseq, ...){
+ GC(dnaseq[seq_len(length(dnaseq) - 3)], ...)
+ }
+ input.gc <- sapply(input, GC.codonW, forceToLower = FALSE)
+ max(abs(input.gc - input.res$GC)) # 0.0004946237
+
+ plot(x = input.gc, y = input.res$GC, las = 1,
+ xlab = "Results with GC()", ylab = "Results from codonW",
+ main = "Comparison of G+C content results")
+ abline(c(0, 1), col = "red")
+ legend("topleft", inset = 0.01, legend = "y = x", lty = 1, col = "red")
+\dontrun{
+# Too long for routine check
+# This is a benchmark to compare the effect of various parameter
+# setting on computation time
+n <- 10
+from <-10^4
+to <- 10^5
+size <- seq(from = from, to = to, length = n)
+res <- data.frame(matrix(NA, nrow = n, ncol = 5))
+colnames(res) <- c("size", "FF", "FT", "TF", "TT")
+res[, "size"] <- size
+
+for(i in seq_len(n)){
+ myseq <- sample(x = s2c("acgtws"), size = size[i], replace = TRUE)
+ res[i, "FF"] <- system.time(GC(myseq, forceToLower = FALSE, exact = FALSE))[3]
+ res[i, "FT"] <- system.time(GC(myseq, forceToLower = FALSE, exact = TRUE))[3]
+ res[i, "TF"] <- system.time(GC(myseq, forceToLower = TRUE, exact = FALSE))[3]
+ res[i, "TT"] <- system.time(GC(myseq, forceToLower = TRUE, exact = TRUE))[3]
+}
+
+par(oma = c(0,0,2.5,0), mar = c(4,5,0,2) + 0.1, mfrow = c(2, 1))
+plot(res$size, res$TT, las = 1,
+xlab = "Sequence size [bp]",
+ylim = c(0, max(res$TT)), xlim = c(0, max(res$size)), ylab = "")
+title(ylab = "Observed time [s]", line = 4)
+abline(lm(res$TT~res$size))
+points(res$size, res$FT, col = "red")
+abline(lm(res$FT~res$size), col = "red", lty = 3)
+points(res$size, res$TF, pch = 2)
+abline(lm(res$TF~res$size))
+points(res$size, res$FF, pch = 2, col = "red")
+abline(lm(res$FF~res$size), lty = 3, col = "red")
+
+
+legend("topleft", inset = 0.01,
+ legend = c("forceToLower = TRUE", "forceToLower = FALSE"),
+ col = c("black", "red"), lty = c(1,3))
+legend("bottomright", inset = 0.01, legend = c("exact = TRUE", "exact = FALSE"),
+pch = c(1,2))
+
+mincpu <- lm(res$FF~res$size)$coef[2]
+
+barplot(
+c(lm(res$FF~res$size)$coef[2]/mincpu,
+ lm(res$TF~res$size)$coef[2]/mincpu,
+ lm(res$FT~res$size)$coef[2]/mincpu,
+ lm(res$TT~res$size)$coef[2]/mincpu),
+horiz = TRUE, xlab = "Increase of CPU time",
+col = c("red", "black", "red", "black"),
+names.arg = c("(F,F)", "(T,F)", "(F,T)", "(T,T)"), las = 1)
+title(ylab = "forceToLower,exact", line = 4)
+
+mtext("CPU time as function of options", outer = TRUE, line = 1, cex = 1.5)
+}
+}
+\keyword{manip}
diff --git a/man/JLO.Rd b/man/JLO.Rd
new file mode 100644
index 0000000..483b18b
--- /dev/null
+++ b/man/JLO.Rd
@@ -0,0 +1,49 @@
+\name{JLO}
+\alias{JLO}
+\docType{data}
+\title{Forensic Genetic Profile Raw Data}
+\description{
+ This is an example of raw data for a human STR genetic profile at 16 loci
+ (\emph{viz.} D8S1179, D21S11, D7S820, CSF1PO, D3S1358, TH01, D13S317, D16S539,
+ D2S1338, D19S433, vWA, TPOX, D18S51, Amelogenin, D5S818, FGA) which
+ are commonly used in forensic sciences for individual identifications.
+}
+\usage{data(JLO)}
+\format{
+ A list with 3 components.
+ \describe{
+ \item{Header}{a list corresponding to the header in the ABIF file}
+ \item{Directory}{a data.frame corresponding to the Directory in the ABIF file}
+ \item{Data}{a list with all raw data in the ABIF file.}
+ }
+}
+\details{
+ This dataset is the expected result when reading the file
+ \code{2_FAC321_0000205983_B02_004.fsa} with the
+ function \code{\link{read.abif}}. This dataset is used for
+ the quality check of this function.
+}
+\source{
+ The DNA source is from the author so that there are no privacy concern.
+ Data were kindly provided by the INPS (Institut National de Police Scientifique)
+ which is the national forensic sciences institute in France. Experiments
+ were done at the LPS (Laboratoire de Police Scientifique de Lyon) in 2008.
+}
+\references{
+ \code{citation("seqinr")}
+
+ Anonymous (2006) Applied Biosystem Genetic Analysis Data File Format.
+ Available at \url{http://www.appliedbiosystems.com/support/software_community/ABIF_File_Format.pdf}.
+ Last visited on 03-NOV-2008.
+}
+\author{J.R. Lobry}
+\seealso{
+ function \code{\link{read.abif}} to import files in ABIF format,
+ data \code{\link{gs500liz}} for internal size standards,
+ data \code{\link{ECH}} for the corresponding allelic ladder,
+ data \code{\link{identifiler}} for allele names in the allelic ladder.
+}
+\examples{
+data(JLO)
+}
+
diff --git a/man/SEQINR.UTIL.Rd b/man/SEQINR.UTIL.Rd
new file mode 100644
index 0000000..7e749b1
--- /dev/null
+++ b/man/SEQINR.UTIL.Rd
@@ -0,0 +1,27 @@
+\name{SEQINR.UTIL}
+\alias{SEQINR.UTIL}
+\docType{data}
+\title{utility data for seqinr}
+\description{
+ This data set gives the genetics code, the name of each codon, the IUPAC one-letter code for aminoacids and the physico-chemical class of amino acid and the pK values of amino acids described in Bjellqvist \emph{et al.} (1993).
+}
+\format{
+ \code{SEQINR.UTIL} is a list containing the 4 following objects:
+ \describe{
+ \item{CODES.NCBI}{is a data frame containing the genetics code : The standard ('Universal') genetic code with a selection of non-standard codes. }
+ \item{CODON.AA}{is a three columns data frame. The first column is a factor containing the codon. The second column is a factor giving the aminoacids names for each codon. The last column is a factor giving the IUPAC one-letter code for aminoacids}
+ \item{AA.PROPERTY}{is a list giving the physico-chemical class of amino acid. The differents classes are the following one : Tiny, Small, Aliphatic, Aromatic, Non.polar, Polar, Charged, Basic, Acidic }
+ \item{pK}{is a data frame. It gives the pK values of amino acids described in Bjellqvist \emph{et al.} (1993) , which were defined by examining polypeptide migration between pH 4.5 to 7.3 in an immobilised pH gradient gel environment with 9.2M and 9.8M urea at 15 degree or 25 degree}
+ }
+}
+\source{
+Data prepared by D. Charif.\cr The genetic codes have been taken from the ncbi database: \url{https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi}. Last visited on 2016-10-05 corresponding to last update of the Genetic Codes: April 30, 2013.\cr The IUPAC one-letter code for aminoacids is descibed at: \url{http://www.chem.qmul.ac.uk/iupac/AminoAcid/}.
+ pK values of amino acids were taken from Bjellqvist et al.\cr
+Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F., Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F.(1993) The focusing positions of polypeptides in immobilized pH gradients can be predicted from their amino acid sequences.\emph{ Electrophoresis}, \bold{14}, 1023-1031.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\examples{
+data(SEQINR.UTIL)
+}
diff --git a/man/SeqAcnucWeb.Rd b/man/SeqAcnucWeb.Rd
new file mode 100644
index 0000000..34149c8
--- /dev/null
+++ b/man/SeqAcnucWeb.Rd
@@ -0,0 +1,38 @@
+\name{SeqAcnucWeb}
+\alias{SeqAcnucWeb}
+\alias{as.SeqAcnucWeb}
+\alias{is.SeqAcnucWeb}
+
+\title{Sequence coming from a remote ACNUC data base}
+\description{
+ \code{as.SeqAcnucWeb} is called by many functions, for instance by \code{query},
+ and should not be directly called by the user. It creates an object of class \code{SeqAcnucWeb}.
+ \code{is.SeqAcnucWeb} returns TRUE if the object is of class \code{SeqAcnucWeb}.
+}
+\usage{
+as.SeqAcnucWeb(object, length, frame, ncbigc)
+is.SeqAcnucWeb(object)
+}
+\arguments{
+ \item{object}{ a string giving the name of a sequence present in the data base}
+ \item{length}{ a string giving the length of the sequence present in the data base}
+ \item{frame}{ a string giving the frame of the sequence present in the data base}
+ \item{ncbigc}{ a string giving the ncbi genetic code of the sequence present in the data base}
+}
+\value{
+ \code{as.SeqAcnucWeb} returns an object sequence of class \code{SeqAcnucWeb}. Note
+ that as from seqinR 1.1-3 the slot socket has been deleted to save space for long lists.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry}
+\examples{
+ \dontrun{# Need internet connection
+ choosebank("emblTP")
+ mylist <- query("mylist", "sp=felis catus et t=cds et o=mitochondrion")
+ stopifnot(is.SeqAcnucWeb(mylist$req[[1]]))
+ closebank()
+}
+}
+\keyword{utilities}
diff --git a/man/SeqFastaAA.Rd b/man/SeqFastaAA.Rd
new file mode 100644
index 0000000..2b20faa
--- /dev/null
+++ b/man/SeqFastaAA.Rd
@@ -0,0 +1,41 @@
+\name{SeqFastaAA}
+\alias{SeqFastaAA}
+\alias{is.SeqFastaAA}
+\alias{as.SeqFastaAA}
+\alias{summary.SeqFastaAA}
+\title{AA sequence in Fasta Format}
+\description{
+ \code{as.SeqFastaAA} is called by the function as \code{read.fasta}. It creates an object of class \code{SeqFastaAA}.
+ \code{is.SeqFastaAA} returns TRUE if the object is of class \code{SeqFastaAA}.
+ \code{summary.SeqFastaAA} gives the AA composition of an object of class \code{SeqFastaAA}.
+}
+\usage{
+as.SeqFastaAA(object, name = NULL, Annot = NULL)
+is.SeqFastaAA(object)
+\method{summary}{SeqFastaAA}(object,...)
+}
+\arguments{
+ \item{object}{ a vector of chars representing a biological sequence }
+ \item{name}{ \code{NULL} a character string specifying a name for the sequence }
+ \item{Annot}{ \code{NULL} a character string specifying some annotations for the sequence }
+ \item{...}{ additional arguments affecting the summary produced }
+}
+\value{
+ \code{as.SeqFastaAA} returns an object sequence of class \code{SeqFastaAA}.
+ \code{summary.SeqFastaAA} returns a list which the following components:
+ \item{composition}{ the AA counting of the sequence}
+ \item{AA.Property}{ the percentage of each group of amino acid in the sequence. By example, the groups are small, tiny, aliphatic, aromatic ... }
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif}
+\examples{
+ s <- read.fasta(file = system.file("sequences/seqAA.fasta", package = "seqinr"), seqtype="AA")
+ is.SeqFastaAA(s[[1]])
+ summary(s[[1]])
+ myseq <- s2c("MSPTAYRRGSPAFLV*")
+ as.SeqFastaAA(myseq, name = "myseq", Annot = "blablabla")
+ myseq
+}
+\keyword{ utilities }
diff --git a/man/SeqFastadna.Rd b/man/SeqFastadna.Rd
new file mode 100644
index 0000000..0ab24d1
--- /dev/null
+++ b/man/SeqFastadna.Rd
@@ -0,0 +1,43 @@
+\name{SeqFastadna}
+\alias{SeqFastadna}
+\alias{is.SeqFastadna}
+\alias{as.SeqFastadna}
+\alias{summary.SeqFastadna}
+\title{ Class for DNA sequence in Fasta Format}
+\description{
+ \code{as.SeqFastadna} is called by many functions as \code{read.fasta}. It creates an object of class \code{SeqFastadna}.
+ \code{is.SeqFastadna} returns TRUE if the object is of class \code{SeqFastadna}.
+ \code{summary.SeqFastadna} gives the base composition of an object of class \code{SeqFastadna}.
+}
+\usage{
+as.SeqFastadna(object, name = NULL, Annot = NULL)
+is.SeqFastadna(object)
+\method{summary}{SeqFastadna}(object, alphabet = s2c("acgt"), ...)
+}
+\arguments{
+ \item{object}{a vector of chars representing a biological sequence}
+ \item{name}{ \code{NULL} a character string specifying a name for the sequence }
+ \item{Annot}{ \code{NULL} a character string specifying some annotations for the sequence }
+ \item{...}{additional arguments affecting the summary produced}
+ \item{alphabet}{a vector of single characters}
+}
+\value{
+ \code{as.SeqFastadna} returns an object sequence of class \code{SeqFastadna}.
+ \code{summary.SeqFastadna} returns a list which the following components:
+ \item{length}{ the legth of the sequence}
+ \item{compo}{the base counting of the sequence}
+ \item{GC}{the percentage of G+C in the sequence}
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif}
+\examples{
+ s <- read.fasta(system.file("sequences/malM.fasta",package="seqinr"))
+ is.SeqFastadna(s[[1]])
+ summary(s[[1]])
+ myseq <- s2c("acgttgatgctagctagcatcgat")
+ as.SeqFastadna(myseq, name = "myseq", Annot = "blablabla")
+ myseq
+}
+\keyword{utilities}
diff --git a/man/SeqFrag.Rd b/man/SeqFrag.Rd
new file mode 100644
index 0000000..31c30d6
--- /dev/null
+++ b/man/SeqFrag.Rd
@@ -0,0 +1,37 @@
+\name{SeqFrag}
+\alias{SeqFrag}
+\alias{is.SeqFrag}
+\alias{as.SeqFrag}
+\title{Class for sub-sequences}
+\description{
+ \code{as.SeqFrag} is called by all methods of \code{\link{getFrag}}, but not directly by the users. It creates an object sequence of class \code{SeqFrag}.
+}
+\usage{
+ as.SeqFrag(object, begin, end, name)
+ is.SeqFrag(object)
+}
+\arguments{
+ \item{object}{ an object sequence of class \code{seqFastadna}, \code{seqFastaAA}, \code{seqAcnucWeb} or \code{seqFrag} }
+ \item{begin}{ the first base of the fragment to get }
+ \item{end}{ the last base of the fragment to get }
+ \item{name}{ the name of the sequence }
+}
+\value{
+ \code{as.SeqFrag} returns a biological sequence with the following attributes:
+ \item{seqMother}{ the name of the sequence from which the sequence comes}
+ \item{begin}{ the position of the first base of the fragment on the mother sequence}
+ \item{end}{ the position of the last base of the fragment on the mother sequence }
+ \item{class}{ \code{SeqFrag} which is the classfor sub-sequence }
+ \code{is.SeqFrag} returns TRUE if the object is of class Seqfrag.
+}
+\author{D. Charif, J.R. Lobry}
+\references{
+ \code{citation("seqinr")}
+}
+\seealso{\code{\link{getFrag}}, \code{\link{getLength}}, \code{\link{getName}},
+ \code{\link{getSequence}}, \code{\link{getTrans}} }
+\examples{
+ s <- read.fasta(file = system.file("sequences/malM.fasta", package = "seqinr"))
+ getFrag(s[[1]], 1, 10)
+}
+\keyword{utilities}
diff --git a/man/a.Rd b/man/a.Rd
new file mode 100644
index 0000000..5869ad0
--- /dev/null
+++ b/man/a.Rd
@@ -0,0 +1,55 @@
+\name{a}
+\alias{a}
+\title{Converts amino-acid three-letter code into the one-letter one}
+\description{
+This is a vectorized function to convert three-letters amino-acid code
+into the one-letter one, for instance "Ala" into "A".
+}
+\usage{
+a(aa)
+}
+\arguments{
+ \item{aa}{ A vector of string. All strings are 3 chars long.}
+}
+\details{
+Allowed character values for \code{aa} are given by \code{aaa()}.
+All other values will generate a warning and return NA.
+Called without arguments, \code{a()} returns the list of all possible
+output values.
+}
+\value{
+A vector of single characters.
+}
+\references{
+The IUPAC one-letter code for aminoacids is described at:
+\url{http://www.chem.qmul.ac.uk/iupac/AminoAcid/}\cr
+\code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry }
+\seealso{\code{\link{aaa}}, \code{\link{translate}} }
+\examples{
+ #
+ # Show all possible input values:
+ #
+
+ aaa()
+
+ #
+ # Convert them in one letter-code:
+ #
+
+ a(aaa())
+
+ #
+ # Check consistency of results:
+ #
+
+ stopifnot( aaa(a(aaa())) == aaa())
+
+ #
+ # Show what happens with non-allowed values:
+ #
+
+ a("SOS") # should be NA and a warning is generated
+}
+\keyword{ utilities }
diff --git a/man/aaa.Rd b/man/aaa.Rd
new file mode 100644
index 0000000..7545f98
--- /dev/null
+++ b/man/aaa.Rd
@@ -0,0 +1,53 @@
+\name{aaa}
+\alias{aaa}
+\title{Converts amino-acid one-letter code into the three-letter one}
+\description{
+This is a vectorized function to convert one-letter amino-acid code
+into the three-letter one, for instance "A" into "Ala".}
+\usage{
+aaa(aa)
+}
+\arguments{
+ \item{aa}{ A vector of single characters.}
+}
+\details{
+Allowed character values for \code{aa} are given by \code{a()}.
+All other values will generate a warning and return NA.
+Called without arguments, \code{aaa()} returns the list of
+all possible output values.}
+\value{
+A vector of char string. All strings are 3 chars long.
+}
+\references{
+The IUPAC one-letter code for aminoacids is described at:
+\url{http://www.chem.qmul.ac.uk/iupac/AminoAcid/}
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{a}}, \code{\link{translate}} }
+\examples{
+ #
+ # Show all possible input values:
+ #
+
+ a()
+
+ #
+ # Convert them in one letter-code:
+ #
+
+ aaa(a())
+
+ #
+ # Check consistency of results:
+ #
+
+ stopifnot(a(aaa(a())) == a())
+
+ #
+ # Show what happens with non-allowed values:
+ #
+
+ aaa("Z") # should be NA and a warning is generated
+}
+\keyword{ utilities }
diff --git a/man/aacost.Rd b/man/aacost.Rd
new file mode 100644
index 0000000..f3ceafc
--- /dev/null
+++ b/man/aacost.Rd
@@ -0,0 +1,70 @@
+\name{aacost}
+\alias{aacost}
+\docType{data}
+\title{Aerobic cost of amino-acids in Escherichia coli and G+C classes}
+\description{
+The metabolic cost of amino-acid biosynthesis in \emph{E. coli} under aerobic conditions
+from table 1 in Akashi and Gojobori (2002). The G+C classes are from Lobry (1997).
+}
+\usage{data(aacost)}
+\format{
+ A data frame with 20 rows for the amino-acids and the following 7 columns:
+ \describe{
+ \item{aaa}{amino-acid (three-letters code).}
+ \item{a}{amino-acid (one-letter code).}
+ \item{prec}{precursor metabolites (see details).}
+ \item{p}{number of high-energy phosphate bonds contained in ATP and GTP molecules.}
+ \item{h}{number of available hydrogen atoms carried in NADH, NADPH,
+ and FADH2 molcules.}
+ \item{tot}{total metabolic cost assuming 2 high-energy phosphate bonds
+ per hydrogen atom.}
+ \item{gc}{an ordered factor (l<m<h) for the G+C class of the amino-acid (see details)}
+ }
+}
+\details{Precursor metabolites are: penP, ribose 5-phosphate; PRPP,
+5-phosphoribosyl pyrophosphate; eryP, erythrose 4-phosphate; 3pg,
+3-phosphoglycerate; pep, phospho\emph{enol}pyruvate; pyr, pyruvate;
+acCoA, acetyl-CoA; akg, alpha-ketoglutarate; oaa, oxaloacetate. Negative
+signs on precursor metabolites indicate chemicals \emph{gained} through
+biosynthetic pathways. Costs of precursors reflect averages for growth on
+glucose, acetate, and malate (see Table 6 in the supporting information
+from Akashi and Gojobori 2002).\cr
+
+The levels l<m<h for the \code{gc} ordered factor stand for Low G+C, Middle G+C,
+High G+C amino-acid, respectively. The frequencies of Low G+C amino-acids
+monotonously decrease with G+C content. The frequencies of High G+C amino-
+acids monotonously increase with G+C content. The frequencies of Middle G+C
+amino-acids first increase and then decrease with G+C content. These G+C classes
+are from Lobry (1997).\cr
+
+\code{example(aacost)} reproduces figure 2 from Lobry (2004).
+
+\if{html}{\figure{aka.pdf}{options: width=400}}
+\if{latex}{\figure{aka.pdf}{options: width=12cm}}
+
+}
+\source{
+Akashi, H, Gojobori, T. (2002) Metabolic efficiency and amino acid composition
+in the proteomes of \emph{Escherichia coli} and \emph{Bacillus subtilis}.
+\emph{Proceedings of the National Academy of Sciences of the United States of America},
+\bold{99}:3695-3700.\cr
+
+Lobry, J.R. (1997) Influence of genomic G+C content on average amino-acid composition
+of proteins from 59 bacterial species. \emph{Gene}, \bold{205}:309-316.\cr
+
+Lobry, J.R. (2004) Life history traits and genome structure: aerobiosis and G+C content in bacteria.
+\emph{Lecture Notes in Computer Sciences}, \bold{3039}:679-686.
+}
+\references{
+\code{citation("seqinr")}
+}
+\examples{
+data(aacost)
+levels(aacost$gc) <- c("low G+C", "mid G+C", "high G+C")
+stripchart(aacost$tot~aacost$gc, pch = 19, ylim = c(0.5,3.5),
+ xlim = c(0, max(aacost$tot)),
+ xlab = "Metabolic cost (high-energy phosphate bonds equivalent)",
+ main = "Metabolic cost of the 20 amino-acids\nas function of their G+C class" )
+boxplot(aacost$tot~aacost$gc, horizontal = TRUE, add = TRUE)
+}
+\keyword{datasets}
diff --git a/man/aaindex.Rd b/man/aaindex.Rd
new file mode 100644
index 0000000..1b9581b
--- /dev/null
+++ b/man/aaindex.Rd
@@ -0,0 +1,618 @@
+\name{aaindex}
+\alias{aaindex}
+\docType{data}
+\title{List of 544 physicochemical and biological properties for the 20 amino-acids}
+\description{Data were imported from release 9.1 (AUG 2006) of the aaindex1
+database. See the reference section to cite this database in a publication.
+}
+\usage{data(aaindex)}
+\format{
+ A named list with 544 elements having each the following components:
+ \describe{
+ \item{H}{String: Accession number in the aaindex database.}
+ \item{D}{String: Data description.}
+ \item{R}{String: LITDB entry number.}
+ \item{A}{String: Author(s).}
+ \item{T}{String: Title of the article.}
+ \item{J}{String: Journal reference and comments.}
+ \item{C}{String: Accession numbers of similar entries with the correlation
+ coefficients of 0.8 (-0.8) or more (less). Notice: The correlation
+ coefficient is calculated with zeros filled for missing values.}
+ \item{I}{Numeric named vector: amino acid index data.}
+ }
+}
+\details{
+A short description of each entry is available under the D component:\cr
+
+alpha-CH chemical shifts (Andersen et al., 1992)\cr
+Hydrophobicity index (Argos et al., 1982)\cr
+Signal sequence helical potential (Argos et al., 1982)\cr
+Membrane-buried preference parameters (Argos et al., 1982)\cr
+Conformational parameter of inner helix (Beghin-Dirkx, 1975)\cr
+Conformational parameter of beta-structure (Beghin-Dirkx, 1975)\cr
+Conformational parameter of beta-turn (Beghin-Dirkx, 1975)\cr
+Average flexibility indices (Bhaskaran-Ponnuswamy, 1988)\cr
+Residue volume (Bigelow, 1967)\cr
+Information value for accessibility; average fraction 35% (Biou et al., 1988)\cr
+Information value for accessibility; average fraction 23% (Biou et al., 1988)\cr
+Retention coefficient in TFA (Browne et al., 1982)\cr
+Retention coefficient in HFBA (Browne et al., 1982)\cr
+Transfer free energy to surface (Bull-Breese, 1974)\cr
+Apparent partial specific volume (Bull-Breese, 1974)\cr
+alpha-NH chemical shifts (Bundi-Wuthrich, 1979)\cr
+alpha-CH chemical shifts (Bundi-Wuthrich, 1979)\cr
+Spin-spin coupling constants 3JHalpha-NH (Bundi-Wuthrich, 1979)\cr
+Normalized frequency of alpha-helix (Burgess et al., 1974)\cr
+Normalized frequency of extended structure (Burgess et al., 1974)\cr
+Steric parameter (Charton, 1981)\cr
+Polarizability parameter (Charton-Charton, 1982)\cr
+Free energy of solution in water, kcal/mole (Charton-Charton, 1982)\cr
+The Chou-Fasman parameter of the coil conformation (Charton-Charton, 1983)\cr
+A parameter defined from the residuals obtained from the best correlation of the Chou-Fasman parameter of beta-sheet (Charton-Charton, 1983)\cr
+The number of atoms in the side chain labelled 1+1 (Charton-Charton, 1983)\cr
+The number of atoms in the side chain labelled 2+1 (Charton-Charton, 1983)\cr
+The number of atoms in the side chain labelled 3+1 (Charton-Charton, 1983)\cr
+The number of bonds in the longest chain (Charton-Charton, 1983)\cr
+A parameter of charge transfer capability (Charton-Charton, 1983)\cr
+A parameter of charge transfer donor capability (Charton-Charton, 1983)\cr
+Average volume of buried residue (Chothia, 1975)\cr
+Residue accessible surface area in tripeptide (Chothia, 1976)\cr
+Residue accessible surface area in folded protein (Chothia, 1976)\cr
+Proportion of residues 95% buried (Chothia, 1976)\cr
+Proportion of residues 100% buried (Chothia, 1976)\cr
+Normalized frequency of beta-turn (Chou-Fasman, 1978a)\cr
+Normalized frequency of alpha-helix (Chou-Fasman, 1978b)\cr
+Normalized frequency of beta-sheet (Chou-Fasman, 1978b)\cr
+Normalized frequency of beta-turn (Chou-Fasman, 1978b)\cr
+Normalized frequency of N-terminal helix (Chou-Fasman, 1978b)\cr
+Normalized frequency of C-terminal helix (Chou-Fasman, 1978b)\cr
+Normalized frequency of N-terminal non helical region (Chou-Fasman, 1978b)\cr
+Normalized frequency of C-terminal non helical region (Chou-Fasman, 1978b)\cr
+Normalized frequency of N-terminal beta-sheet (Chou-Fasman, 1978b)\cr
+Normalized frequency of C-terminal beta-sheet (Chou-Fasman, 1978b)\cr
+Normalized frequency of N-terminal non beta region (Chou-Fasman, 1978b)\cr
+Normalized frequency of C-terminal non beta region (Chou-Fasman, 1978b)\cr
+Frequency of the 1st residue in turn (Chou-Fasman, 1978b)\cr
+Frequency of the 2nd residue in turn (Chou-Fasman, 1978b)\cr
+Frequency of the 3rd residue in turn (Chou-Fasman, 1978b)\cr
+Frequency of the 4th residue in turn (Chou-Fasman, 1978b)\cr
+Normalized frequency of the 2nd and 3rd residues in turn (Chou-Fasman, 1978b)\cr
+Normalized hydrophobicity scales for alpha-proteins (Cid et al., 1992)\cr
+Normalized hydrophobicity scales for beta-proteins (Cid et al., 1992)\cr
+Normalized hydrophobicity scales for alpha+beta-proteins (Cid et al., 1992)\cr
+Normalized hydrophobicity scales for alpha/beta-proteins (Cid et al., 1992)\cr
+Normalized average hydrophobicity scales (Cid et al., 1992)\cr
+Partial specific volume (Cohn-Edsall, 1943)\cr
+Normalized frequency of middle helix (Crawford et al., 1973)\cr
+Normalized frequency of beta-sheet (Crawford et al., 1973)\cr
+Normalized frequency of turn (Crawford et al., 1973)\cr
+Size (Dawson, 1972)\cr
+Amino acid composition (Dayhoff et al., 1978a)\cr
+Relative mutability (Dayhoff et al., 1978b)\cr
+Membrane preference for cytochrome b: MPH89 (Degli Esposti et al., 1990)\cr
+Average membrane preference: AMP07 (Degli Esposti et al., 1990)\cr
+Consensus normalized hydrophobicity scale (Eisenberg, 1984)\cr
+Solvation free energy (Eisenberg-McLachlan, 1986)\cr
+Atom-based hydrophobic moment (Eisenberg-McLachlan, 1986)\cr
+Direction of hydrophobic moment (Eisenberg-McLachlan, 1986)\cr
+Molecular weight (Fasman, 1976)\cr
+Melting point (Fasman, 1976)\cr
+Optical rotation (Fasman, 1976)\cr
+pK-N (Fasman, 1976)\cr
+pK-C (Fasman, 1976)\cr
+Hydrophobic parameter pi (Fauchere-Pliska, 1983)\cr
+Graph shape index (Fauchere et al., 1988)\cr
+Smoothed upsilon steric parameter (Fauchere et al., 1988)\cr
+Normalized van der Waals volume (Fauchere et al., 1988)\cr
+STERIMOL length of the side chain (Fauchere et al., 1988)\cr
+STERIMOL minimum width of the side chain (Fauchere et al., 1988)\cr
+STERIMOL maximum width of the side chain (Fauchere et al., 1988)\cr
+N.m.r. chemical shift of alpha-carbon (Fauchere et al., 1988)\cr
+Localized electrical effect (Fauchere et al., 1988)\cr
+Number of hydrogen bond donors (Fauchere et al., 1988)\cr
+Number of full nonbonding orbitals (Fauchere et al., 1988)\cr
+Positive charge (Fauchere et al., 1988)\cr
+Negative charge (Fauchere et al., 1988)\cr
+pK-a(RCOOH) (Fauchere et al., 1988)\cr
+Helix-coil equilibrium constant (Finkelstein-Ptitsyn, 1977)\cr
+Helix initiation parameter at posision i-1 (Finkelstein et al., 1991)\cr
+Helix initiation parameter at posision i,i+1,i+2 (Finkelstein et al., 1991)\cr
+Helix termination parameter at posision j-2,j-1,j (Finkelstein et al., 1991)\cr
+Helix termination parameter at posision j+1 (Finkelstein et al., 1991)\cr
+Partition coefficient (Garel et al., 1973)\cr
+Alpha-helix indices (Geisow-Roberts, 1980)\cr
+Alpha-helix indices for alpha-proteins (Geisow-Roberts, 1980)\cr
+Alpha-helix indices for beta-proteins (Geisow-Roberts, 1980)\cr
+Alpha-helix indices for alpha/beta-proteins (Geisow-Roberts, 1980)\cr
+Beta-strand indices (Geisow-Roberts, 1980)\cr
+Beta-strand indices for beta-proteins (Geisow-Roberts, 1980)\cr
+Beta-strand indices for alpha/beta-proteins (Geisow-Roberts, 1980)\cr
+Aperiodic indices (Geisow-Roberts, 1980)\cr
+Aperiodic indices for alpha-proteins (Geisow-Roberts, 1980)\cr
+Aperiodic indices for beta-proteins (Geisow-Roberts, 1980)\cr
+Aperiodic indices for alpha/beta-proteins (Geisow-Roberts, 1980)\cr
+Hydrophobicity factor (Goldsack-Chalifoux, 1973)\cr
+Residue volume (Goldsack-Chalifoux, 1973)\cr
+Composition (Grantham, 1974)\cr
+Polarity (Grantham, 1974)\cr
+Volume (Grantham, 1974)\cr
+Partition energy (Guy, 1985)\cr
+Hydration number (Hopfinger, 1971), Cited by Charton-Charton (1982)\cr
+Hydrophilicity value (Hopp-Woods, 1981)\cr
+Heat capacity (Hutchens, 1970)\cr
+Absolute entropy (Hutchens, 1970)\cr
+Entropy of formation (Hutchens, 1970)\cr
+Normalized relative frequency of alpha-helix (Isogai et al., 1980)\cr
+Normalized relative frequency of extended structure (Isogai et al., 1980)\cr
+Normalized relative frequency of bend (Isogai et al., 1980)\cr
+Normalized relative frequency of bend R (Isogai et al., 1980)\cr
+Normalized relative frequency of bend S (Isogai et al., 1980)\cr
+Normalized relative frequency of helix end (Isogai et al., 1980)\cr
+Normalized relative frequency of double bend (Isogai et al., 1980)\cr
+Normalized relative frequency of coil (Isogai et al., 1980)\cr
+Average accessible surface area (Janin et al., 1978)\cr
+Percentage of buried residues (Janin et al., 1978)\cr
+Percentage of exposed residues (Janin et al., 1978)\cr
+Ratio of buried and accessible molar fractions (Janin, 1979)\cr
+Transfer free energy (Janin, 1979)\cr
+Hydrophobicity (Jones, 1975)\cr
+pK (-COOH) (Jones, 1975)\cr
+Relative frequency of occurrence (Jones et al., 1992)\cr
+Relative mutability (Jones et al., 1992)\cr
+Amino acid distribution (Jukes et al., 1975)\cr
+Sequence frequency (Jungck, 1978)\cr
+Average relative probability of helix (Kanehisa-Tsong, 1980)\cr
+Average relative probability of beta-sheet (Kanehisa-Tsong, 1980)\cr
+Average relative probability of inner helix (Kanehisa-Tsong, 1980)\cr
+Average relative probability of inner beta-sheet (Kanehisa-Tsong, 1980)\cr
+Flexibility parameter for no rigid neighbors (Karplus-Schulz, 1985)\cr
+Flexibility parameter for one rigid neighbor (Karplus-Schulz, 1985)\cr
+Flexibility parameter for two rigid neighbors (Karplus-Schulz, 1985)\cr
+The Kerr-constant increments (Khanarian-Moore, 1980)\cr
+Net charge (Klein et al., 1984)\cr
+Side chain interaction parameter (Krigbaum-Rubin, 1971)\cr
+Side chain interaction parameter (Krigbaum-Komoriya, 1979)\cr
+Fraction of site occupied by water (Krigbaum-Komoriya, 1979)\cr
+Side chain volume (Krigbaum-Komoriya, 1979)\cr
+Hydropathy index (Kyte-Doolittle, 1982)\cr
+Transfer free energy, CHP/water (Lawson et al., 1984)\cr
+Hydrophobic parameter (Levitt, 1976)\cr
+Distance between C-alpha and centroid of side chain (Levitt, 1976)\cr
+Side chain angle theta(AAR) (Levitt, 1976)\cr
+Side chain torsion angle phi(AAAR) (Levitt, 1976)\cr
+Radius of gyration of side chain (Levitt, 1976)\cr
+van der Waals parameter R0 (Levitt, 1976)\cr
+van der Waals parameter epsilon (Levitt, 1976)\cr
+Normalized frequency of alpha-helix, with weights (Levitt, 1978)\cr
+Normalized frequency of beta-sheet, with weights (Levitt, 1978)\cr
+Normalized frequency of reverse turn, with weights (Levitt, 1978)\cr
+Normalized frequency of alpha-helix, unweighted (Levitt, 1978)\cr
+Normalized frequency of beta-sheet, unweighted (Levitt, 1978)\cr
+Normalized frequency of reverse turn, unweighted (Levitt, 1978)\cr
+Frequency of occurrence in beta-bends (Lewis et al., 1971)\cr
+Conformational preference for all beta-strands (Lifson-Sander, 1979)\cr
+Conformational preference for parallel beta-strands (Lifson-Sander, 1979)\cr
+Conformational preference for antiparallel beta-strands (Lifson-Sander, 1979)\cr
+Average surrounding hydrophobicity (Manavalan-Ponnuswamy, 1978)\cr
+Normalized frequency of alpha-helix (Maxfield-Scheraga, 1976)\cr
+Normalized frequency of extended structure (Maxfield-Scheraga, 1976)\cr
+Normalized frequency of zeta R (Maxfield-Scheraga, 1976)\cr
+Normalized frequency of left-handed alpha-helix (Maxfield-Scheraga, 1976)\cr
+Normalized frequency of zeta L (Maxfield-Scheraga, 1976)\cr
+Normalized frequency of alpha region (Maxfield-Scheraga, 1976)\cr
+Refractivity (McMeekin et al., 1964), Cited by Jones (1975)\cr
+Retention coefficient in HPLC, pH7.4 (Meek, 1980)\cr
+Retention coefficient in HPLC, pH2.1 (Meek, 1980)\cr
+Retention coefficient in NaClO4 (Meek-Rossetti, 1981)\cr
+Retention coefficient in NaH2PO4 (Meek-Rossetti, 1981)\cr
+Average reduced distance for C-alpha (Meirovitch et al., 1980)\cr
+Average reduced distance for side chain (Meirovitch et al., 1980)\cr
+Average side chain orientation angle (Meirovitch et al., 1980)\cr
+Effective partition energy (Miyazawa-Jernigan, 1985)\cr
+Normalized frequency of alpha-helix (Nagano, 1973)\cr
+Normalized frequency of bata-structure (Nagano, 1973)\cr
+Normalized frequency of coil (Nagano, 1973)\cr
+AA composition of total proteins (Nakashima et al., 1990)\cr
+SD of AA composition of total proteins (Nakashima et al., 1990)\cr
+AA composition of mt-proteins (Nakashima et al., 1990)\cr
+Normalized composition of mt-proteins (Nakashima et al., 1990)\cr
+AA composition of mt-proteins from animal (Nakashima et al., 1990)\cr
+Normalized composition from animal (Nakashima et al., 1990)\cr
+AA composition of mt-proteins from fungi and plant (Nakashima et al., 1990)\cr
+Normalized composition from fungi and plant (Nakashima et al., 1990)\cr
+AA composition of membrane proteins (Nakashima et al., 1990)\cr
+Normalized composition of membrane proteins (Nakashima et al., 1990)\cr
+Transmembrane regions of non-mt-proteins (Nakashima et al., 1990)\cr
+Transmembrane regions of mt-proteins (Nakashima et al., 1990)\cr
+Ratio of average and computed composition (Nakashima et al., 1990)\cr
+AA composition of CYT of single-spanning proteins (Nakashima-Nishikawa, 1992)\cr
+AA composition of CYT2 of single-spanning proteins (Nakashima-Nishikawa, 1992)\cr
+AA composition of EXT of single-spanning proteins (Nakashima-Nishikawa, 1992)\cr
+AA composition of EXT2 of single-spanning proteins (Nakashima-Nishikawa, 1992)\cr
+AA composition of MEM of single-spanning proteins (Nakashima-Nishikawa, 1992)\cr
+AA composition of CYT of multi-spanning proteins (Nakashima-Nishikawa, 1992)\cr
+AA composition of EXT of multi-spanning proteins (Nakashima-Nishikawa, 1992)\cr
+AA composition of MEM of multi-spanning proteins (Nakashima-Nishikawa, 1992)\cr
+8 A contact number (Nishikawa-Ooi, 1980)\cr
+14 A contact number (Nishikawa-Ooi, 1986)\cr
+Transfer energy, organic solvent/water (Nozaki-Tanford, 1971)\cr
+Average non-bonded energy per atom (Oobatake-Ooi, 1977)\cr
+Short and medium range non-bonded energy per atom (Oobatake-Ooi, 1977)\cr
+Long range non-bonded energy per atom (Oobatake-Ooi, 1977)\cr
+Average non-bonded energy per residue (Oobatake-Ooi, 1977)\cr
+Short and medium range non-bonded energy per residue (Oobatake-Ooi, 1977)\cr
+Optimized beta-structure-coil equilibrium constant (Oobatake et al., 1985)\cr
+Optimized propensity to form reverse turn (Oobatake et al., 1985)\cr
+Optimized transfer energy parameter (Oobatake et al., 1985)\cr
+Optimized average non-bonded energy per atom (Oobatake et al., 1985)\cr
+Optimized side chain interaction parameter (Oobatake et al., 1985)\cr
+Normalized frequency of alpha-helix from LG (Palau et al., 1981)\cr
+Normalized frequency of alpha-helix from CF (Palau et al., 1981)\cr
+Normalized frequency of beta-sheet from LG (Palau et al., 1981)\cr
+Normalized frequency of beta-sheet from CF (Palau et al., 1981)\cr
+Normalized frequency of turn from LG (Palau et al., 1981)\cr
+Normalized frequency of turn from CF (Palau et al., 1981)\cr
+Normalized frequency of alpha-helix in all-alpha class (Palau et al., 1981)\cr
+Normalized frequency of alpha-helix in alpha+beta class (Palau et al., 1981)\cr
+Normalized frequency of alpha-helix in alpha/beta class (Palau et al., 1981)\cr
+Normalized frequency of beta-sheet in all-beta class (Palau et al., 1981)\cr
+Normalized frequency of beta-sheet in alpha+beta class (Palau et al., 1981)\cr
+Normalized frequency of beta-sheet in alpha/beta class (Palau et al., 1981)\cr
+Normalized frequency of turn in all-alpha class (Palau et al., 1981)\cr
+Normalized frequency of turn in all-beta class (Palau et al., 1981)\cr
+Normalized frequency of turn in alpha+beta class (Palau et al., 1981)\cr
+Normalized frequency of turn in alpha/beta class (Palau et al., 1981)\cr
+HPLC parameter (Parker et al., 1986)\cr
+Partition coefficient (Pliska et al., 1981)\cr
+Surrounding hydrophobicity in folded form (Ponnuswamy et al., 1980)\cr
+Average gain in surrounding hydrophobicity (Ponnuswamy et al., 1980)\cr
+Average gain ratio in surrounding hydrophobicity (Ponnuswamy et al., 1980)\cr
+Surrounding hydrophobicity in alpha-helix (Ponnuswamy et al., 1980)\cr
+Surrounding hydrophobicity in beta-sheet (Ponnuswamy et al., 1980)\cr
+Surrounding hydrophobicity in turn (Ponnuswamy et al., 1980)\cr
+Accessibility reduction ratio (Ponnuswamy et al., 1980)\cr
+Average number of surrounding residues (Ponnuswamy et al., 1980)\cr
+Intercept in regression analysis (Prabhakaran-Ponnuswamy, 1982)\cr
+Slope in regression analysis x 1.0E1 (Prabhakaran-Ponnuswamy, 1982)\cr
+Correlation coefficient in regression analysis (Prabhakaran-Ponnuswamy, 1982)\cr
+Hydrophobicity (Prabhakaran, 1990)\cr
+Relative frequency in alpha-helix (Prabhakaran, 1990)\cr
+Relative frequency in beta-sheet (Prabhakaran, 1990)\cr
+Relative frequency in reverse-turn (Prabhakaran, 1990)\cr
+Helix-coil equilibrium constant (Ptitsyn-Finkelstein, 1983)\cr
+Beta-coil equilibrium constant (Ptitsyn-Finkelstein, 1983)\cr
+Weights for alpha-helix at the window position of -6 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of -5 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of -4 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of -3 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of -2 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of -1 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of 0 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of 1 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of 2 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of 3 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of 4 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of 5 (Qian-Sejnowski, 1988)\cr
+Weights for alpha-helix at the window position of 6 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of -6 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of -5 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of -4 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of -3 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of -2 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of -1 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of 0 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of 1 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of 2 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of 3 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of 4 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of 5 (Qian-Sejnowski, 1988)\cr
+Weights for beta-sheet at the window position of 6 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of -6 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of -5 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of -4 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of -3 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of -2 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of -1 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of 0 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of 1 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of 2 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of 3 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of 4 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of 5 (Qian-Sejnowski, 1988)\cr
+Weights for coil at the window position of 6 (Qian-Sejnowski, 1988)\cr
+Average reduced distance for C-alpha (Rackovsky-Scheraga, 1977)\cr
+Average reduced distance for side chain (Rackovsky-Scheraga, 1977)\cr
+Side chain orientational preference (Rackovsky-Scheraga, 1977)\cr
+Average relative fractional occurrence in A0(i) (Rackovsky-Scheraga, 1982)\cr
+Average relative fractional occurrence in AR(i) (Rackovsky-Scheraga, 1982)\cr
+Average relative fractional occurrence in AL(i) (Rackovsky-Scheraga, 1982)\cr
+Average relative fractional occurrence in EL(i) (Rackovsky-Scheraga, 1982)\cr
+Average relative fractional occurrence in E0(i) (Rackovsky-Scheraga, 1982)\cr
+Average relative fractional occurrence in ER(i) (Rackovsky-Scheraga, 1982)\cr
+Average relative fractional occurrence in A0(i-1) (Rackovsky-Scheraga, 1982)\cr
+Average relative fractional occurrence in AR(i-1) (Rackovsky-Scheraga, 1982)\cr
+Average relative fractional occurrence in AL(i-1) (Rackovsky-Scheraga, 1982)\cr
+Average relative fractional occurrence in EL(i-1) (Rackovsky-Scheraga, 1982)\cr
+Average relative fractional occurrence in E0(i-1) (Rackovsky-Scheraga, 1982)\cr
+Average relative fractional occurrence in ER(i-1) (Rackovsky-Scheraga, 1982)\cr
+Value of theta(i) (Rackovsky-Scheraga, 1982)\cr
+Value of theta(i-1) (Rackovsky-Scheraga, 1982)\cr
+Transfer free energy from chx to wat (Radzicka-Wolfenden, 1988)\cr
+Transfer free energy from oct to wat (Radzicka-Wolfenden, 1988)\cr
+Transfer free energy from vap to chx (Radzicka-Wolfenden, 1988)\cr
+Transfer free energy from chx to oct (Radzicka-Wolfenden, 1988)\cr
+Transfer free energy from vap to oct (Radzicka-Wolfenden, 1988)\cr
+Accessible surface area (Radzicka-Wolfenden, 1988)\cr
+Energy transfer from out to in(95%buried) (Radzicka-Wolfenden, 1988)\cr
+Mean polarity (Radzicka-Wolfenden, 1988)\cr
+Relative preference value at N" (Richardson-Richardson, 1988)\cr
+Relative preference value at N' (Richardson-Richardson, 1988)\cr
+Relative preference value at N-cap (Richardson-Richardson, 1988)\cr
+Relative preference value at N1 (Richardson-Richardson, 1988)\cr
+Relative preference value at N2 (Richardson-Richardson, 1988)\cr
+Relative preference value at N3 (Richardson-Richardson, 1988)\cr
+Relative preference value at N4 (Richardson-Richardson, 1988)\cr
+Relative preference value at N5 (Richardson-Richardson, 1988)\cr
+Relative preference value at Mid (Richardson-Richardson, 1988)\cr
+Relative preference value at C5 (Richardson-Richardson, 1988)\cr
+Relative preference value at C4 (Richardson-Richardson, 1988)\cr
+Relative preference value at C3 (Richardson-Richardson, 1988)\cr
+Relative preference value at C2 (Richardson-Richardson, 1988)\cr
+Relative preference value at C1 (Richardson-Richardson, 1988)\cr
+Relative preference value at C-cap (Richardson-Richardson, 1988)\cr
+Relative preference value at C' (Richardson-Richardson, 1988)\cr
+Relative preference value at C" (Richardson-Richardson, 1988)\cr
+Information measure for alpha-helix (Robson-Suzuki, 1976)\cr
+Information measure for N-terminal helix (Robson-Suzuki, 1976)\cr
+Information measure for middle helix (Robson-Suzuki, 1976)\cr
+Information measure for C-terminal helix (Robson-Suzuki, 1976)\cr
+Information measure for extended (Robson-Suzuki, 1976)\cr
+Information measure for pleated-sheet (Robson-Suzuki, 1976)\cr
+Information measure for extended without H-bond (Robson-Suzuki, 1976)\cr
+Information measure for turn (Robson-Suzuki, 1976)\cr
+Information measure for N-terminal turn (Robson-Suzuki, 1976)\cr
+Information measure for middle turn (Robson-Suzuki, 1976)\cr
+Information measure for C-terminal turn (Robson-Suzuki, 1976)\cr
+Information measure for coil (Robson-Suzuki, 1976)\cr
+Information measure for loop (Robson-Suzuki, 1976)\cr
+Hydration free energy (Robson-Osguthorpe, 1979)\cr
+Mean area buried on transfer (Rose et al., 1985)\cr
+Mean fractional area loss (Rose et al., 1985)\cr
+Side chain hydropathy, uncorrected for solvation (Roseman, 1988)\cr
+Side chain hydropathy, corrected for solvation (Roseman, 1988)\cr
+Loss of Side chain hydropathy by helix formation (Roseman, 1988)\cr
+Transfer free energy (Simon, 1976), Cited by Charton-Charton (1982)\cr
+Principal component I (Sneath, 1966)\cr
+Principal component II (Sneath, 1966)\cr
+Principal component III (Sneath, 1966)\cr
+Principal component IV (Sneath, 1966)\cr
+Zimm-Bragg parameter s at 20 C (Sueki et al., 1984)\cr
+Zimm-Bragg parameter sigma x 1.0E4 (Sueki et al., 1984)\cr
+Optimal matching hydrophobicity (Sweet-Eisenberg, 1983)\cr
+Normalized frequency of alpha-helix (Tanaka-Scheraga, 1977)\cr
+Normalized frequency of isolated helix (Tanaka-Scheraga, 1977)\cr
+Normalized frequency of extended structure (Tanaka-Scheraga, 1977)\cr
+Normalized frequency of chain reversal R (Tanaka-Scheraga, 1977)\cr
+Normalized frequency of chain reversal S (Tanaka-Scheraga, 1977)\cr
+Normalized frequency of chain reversal D (Tanaka-Scheraga, 1977)\cr
+Normalized frequency of left-handed helix (Tanaka-Scheraga, 1977)\cr
+Normalized frequency of zeta R (Tanaka-Scheraga, 1977)\cr
+Normalized frequency of coil (Tanaka-Scheraga, 1977)\cr
+Normalized frequency of chain reversal (Tanaka-Scheraga, 1977)\cr
+Relative population of conformational state A (Vasquez et al., 1983)\cr
+Relative population of conformational state C (Vasquez et al., 1983)\cr
+Relative population of conformational state E (Vasquez et al., 1983)\cr
+Electron-ion interaction potential (Veljkovic et al., 1985)\cr
+Bitterness (Venanzi, 1984)\cr
+Transfer free energy to lipophilic phase (von Heijne-Blomberg, 1979)\cr
+Average interactions per side chain atom (Warme-Morgan, 1978)\cr
+RF value in high salt chromatography (Weber-Lacey, 1978)\cr
+Propensity to be buried inside (Wertz-Scheraga, 1978)\cr
+Free energy change of epsilon(i) to epsilon(ex) (Wertz-Scheraga, 1978)\cr
+Free energy change of alpha(Ri) to alpha(Rh) (Wertz-Scheraga, 1978)\cr
+Free energy change of epsilon(i) to alpha(Rh) (Wertz-Scheraga, 1978)\cr
+Polar requirement (Woese, 1973)\cr
+Hydration potential (Wolfenden et al., 1981)\cr
+Principal property value z1 (Wold et al., 1987)\cr
+Principal property value z2 (Wold et al., 1987)\cr
+Principal property value z3 (Wold et al., 1987)\cr
+Unfolding Gibbs energy in water, pH7.0 (Yutani et al., 1987)\cr
+Unfolding Gibbs energy in water, pH9.0 (Yutani et al., 1987)\cr
+Activation Gibbs energy of unfolding, pH7.0 (Yutani et al., 1987)\cr
+Activation Gibbs energy of unfolding, pH9.0 (Yutani et al., 1987)\cr
+Dependence of partition coefficient on ionic strength (Zaslavsky et al., 1982)\cr
+Hydrophobicity (Zimmerman et al., 1968)\cr
+Bulkiness (Zimmerman et al., 1968)\cr
+Polarity (Zimmerman et al., 1968)\cr
+Isoelectric point (Zimmerman et al., 1968)\cr
+RF rank (Zimmerman et al., 1968)\cr
+Normalized positional residue frequency at helix termini N4'(Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini N"' (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini N" (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini N'(Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini Nc (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini N1 (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini N2 (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini N3 (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini N4 (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini N5 (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini C5 (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini C4 (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini C3 (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini C2 (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini C1 (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini Cc (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini C' (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini C" (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini C"' (Aurora-Rose, 1998)\cr
+Normalized positional residue frequency at helix termini C4' (Aurora-Rose, 1998)\cr
+Delta G values for the peptides extrapolated to 0 M urea (O'Neil-DeGrado, 1990)\cr
+Helix formation parameters (delta delta G) (O'Neil-DeGrado, 1990)\cr
+Normalized flexibility parameters (B-values), average (Vihinen et al., 1994)\cr
+Normalized flexibility parameters (B-values) for each residue surrounded by none rigid neighbours (Vihinen et al., 1994)\cr
+Normalized flexibility parameters (B-values) for each residue surrounded by one rigid neighbours (Vihinen et al., 1994)\cr
+Normalized flexibility parameters (B-values) for each residue surrounded by two rigid neighbours (Vihinen et al., 1994)\cr
+Free energy in alpha-helical conformation (Munoz-Serrano, 1994)\cr
+Free energy in alpha-helical region (Munoz-Serrano, 1994)\cr
+Free energy in beta-strand conformation (Munoz-Serrano, 1994)\cr
+Free energy in beta-strand region (Munoz-Serrano, 1994)\cr
+Free energy in beta-strand region (Munoz-Serrano, 1994)\cr
+Free energies of transfer of AcWl-X-LL peptides from bilayer interface to water (Wimley-White, 1996)\cr
+Thermodynamic beta sheet propensity (Kim-Berg, 1993)\cr
+Turn propensity scale for transmembrane helices (Monne et al., 1999)\cr
+Alpha helix propensity of position 44 in T4 lysozyme (Blaber et al., 1993)\cr
+p-Values of mesophilic proteins based on the distributions of B values (Parthasarathy-Murthy, 2000)\cr
+p-Values of thermophilic proteins based on the distributions of B values (Parthasarathy-Murthy, 2000)\cr
+Distribution of amino acid residues in the 18 non-redundant families of thermophilic proteins (Kumar et al., 2000)\cr
+Distribution of amino acid residues in the 18 non-redundant families of mesophilic proteins (Kumar et al., 2000)\cr
+Distribution of amino acid residues in the alpha-helices in thermophilic proteins (Kumar et al., 2000)\cr
+Distribution of amino acid residues in the alpha-helices in mesophilic proteins (Kumar et al., 2000)\cr
+Side-chain contribution to protein stability (kJ/mol) (Takano-Yutani, 2001)\cr
+Propensity of amino acids within pi-helices (Fodje-Al-Karadaghi, 2002)\cr
+Hydropathy scale based on self-information values in the two-state model (5% accessibility) (Naderi-Manesh et al., 2001)\cr
+Hydropathy scale based on self-information values in the two-state model (9% accessibility) (Naderi-Manesh et al., 2001)\cr
+Hydropathy scale based on self-information values in the two-state model (16% accessibility) (Naderi-Manesh et al., 2001)\cr
+Hydropathy scale based on self-information values in the two-state model (20% accessibility) (Naderi-Manesh et al., 2001)\cr
+Hydropathy scale based on self-information values in the two-state model (25% accessibility) (Naderi-Manesh et al., 2001)\cr
+Hydropathy scale based on self-information values in the two-state model (36% accessibility) (Naderi-Manesh et al., 2001)\cr
+Hydropathy scale based on self-information values in the two-state model (50% accessibility) (Naderi-Manesh et al., 2001)\cr
+Averaged turn propensities in a transmembrane helix (Monne et al., 1999)\cr
+Alpha-helix propensity derived from designed sequences (Koehl-Levitt, 1999)\cr
+Beta-sheet propensity derived from designed sequences (Koehl-Levitt, 1999)\cr
+Composition of amino acids in extracellular proteins (percent) (Cedano et al., 1997)\cr
+Composition of amino acids in anchored proteins (percent) (Cedano et al., 1997)\cr
+Composition of amino acids in membrane proteins (percent) (Cedano et al., 1997)\cr
+Composition of amino acids in intracellular proteins (percent) (Cedano et al., 1997)\cr
+Composition of amino acids in nuclear proteins (percent) (Cedano et al., 1997)\cr
+Surface composition of amino acids in intracellular proteins of thermophiles (percent) (Fukuchi-Nishikawa, 2001)\cr
+Surface composition of amino acids in intracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)\cr
+Surface composition of amino acids in extracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)\cr
+Surface composition of amino acids in nuclear proteins (percent) (Fukuchi-Nishikawa, 2001)\cr
+Interior composition of amino acids in intracellular proteins of thermophiles (percent) (Fukuchi-Nishikawa, 2001)\cr
+Interior composition of amino acids in intracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)\cr
+Interior composition of amino acids in extracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)\cr
+Interior composition of amino acids in nuclear proteins (percent) (Fukuchi-Nishikawa, 2001)\cr
+Entire chain composition of amino acids in intracellular proteins of thermophiles (percent) (Fukuchi-Nishikawa, 2001)\cr
+Entire chain composition of amino acids in intracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)\cr
+Entire chain composition of amino acids in extracellular proteins of mesophiles (percent) (Fukuchi-Nishikawa, 2001)\cr
+Entire chain compositino of amino acids in nuclear proteins (percent) (Fukuchi-Nishikawa, 2001)\cr
+Screening coefficients gamma, local (Avbelj, 2000)\cr
+Screening coefficients gamma, non-local (Avbelj, 2000)\cr
+Slopes tripeptide, FDPB VFF neutral (Avbelj, 2000)\cr
+Slopes tripeptides, LD VFF neutral (Avbelj, 2000)\cr
+Slopes tripeptide, FDPB VFF noside (Avbelj, 2000)\cr
+Slopes tripeptide FDPB VFF all (Avbelj, 2000)\cr
+Slopes tripeptide FDPB PARSE neutral (Avbelj, 2000)\cr
+Slopes dekapeptide, FDPB VFF neutral (Avbelj, 2000)\cr
+Slopes proteins, FDPB VFF neutral (Avbelj, 2000)\cr
+Side-chain conformation by gaussian evolutionary method (Yang et al., 2002)\cr
+Amphiphilicity index (Mitaku et al., 2002)\cr
+Volumes including the crystallographic waters using the ProtOr (Tsai et al., 1999)\cr
+Volumes not including the crystallographic waters using the ProtOr (Tsai et al., 1999)\cr
+Electron-ion interaction potential values (Cosic, 1994)\cr
+Hydrophobicity scales (Ponnuswamy, 1993)\cr
+Hydrophobicity coefficient in RP-HPLC, C18 with 0.1%TFA/MeCN/H2O (Wilce et al. 1995)\cr
+Hydrophobicity coefficient in RP-HPLC, C8 with 0.1%TFA/MeCN/H2O (Wilce et al. 1995)\cr
+Hydrophobicity coefficient in RP-HPLC, C4 with 0.1%TFA/MeCN/H2O (Wilce et al. 1995)\cr
+Hydrophobicity coefficient in RP-HPLC, C18 with 0.1%TFA/2-PrOH/MeCN/H2O (Wilce et al. 1995)\cr
+Hydrophilicity scale (Kuhn et al., 1995)\cr
+Retention coefficient at pH 2 (Guo et al., 1986)\cr
+Modified Kyte-Doolittle hydrophobicity scale (Juretic et al., 1998)\cr
+Interactivity scale obtained from the contact matrix (Bastolla et al., 2005)\cr
+Interactivity scale obtained by maximizing the mean of correlation coefficient over single-domain globular proteins (Bastolla et al., 2005)\cr
+Interactivity scale obtained by maximizing the mean of correlation coefficient over pairs of sequences sharing the TIM barrel fold (Bastolla et al., 2005)\cr
+Linker propensity index (Suyama-Ohara, 2003)\cr
+Knowledge-based membrane-propensity scale from 1D\_Helix in MPtopo databases (Punta-Maritan, 2003)\cr
+Knowledge-based membrane-propensity scale from 3D\_Helix in MPtopo databases (Punta-Maritan, 2003)\cr
+Linker propensity from all dataset (George-Heringa, 2003)\cr
+Linker propensity from 1-linker dataset (George-Heringa, 2003)\cr
+Linker propensity from 2-linker dataset (George-Heringa, 2003)\cr
+Linker propensity from 3-linker dataset (George-Heringa, 2003)\cr
+Linker propensity from small dataset (linker length is less than six residues) (George-Heringa, 2003)\cr
+Linker propensity from medium dataset (linker length is between six and 14 residues) (George-Heringa, 2003)\cr
+Linker propensity from long dataset (linker length is greater than 14 residues) (George-Heringa, 2003)\cr
+Linker propensity from helical (annotated by DSSP) dataset (George-Heringa, 2003)\cr
+Linker propensity from non-helical (annotated by DSSP) dataset (George-Heringa, 2003)\cr
+The stability scale from the knowledge-based atom-atom potential (Zhou-Zhou, 2004)\cr
+The relative stability scale extracted from mutation experiments (Zhou-Zhou, 2004)\cr
+Buriability (Zhou-Zhou, 2004)\cr
+Linker index (Bae et al., 2005)\cr
+Mean volumes of residues buried in protein interiors (Harpaz et al., 1994)\cr
+Average volumes of residues (Pontius et al., 1996)\cr
+Hydrostatic pressure asymmetry index, PAI (Di Giulio, 2005)\cr
+Hydrophobicity index (Wolfenden et al., 1979)\cr
+Average internal preferences (Olsen, 1980)\cr
+Hydrophobicity-related index (Kidera et al., 1985)\cr
+Apparent partition energies calculated from Wertz-Scheraga index (Guy, 1985)\cr
+Apparent partition energies calculated from Robson-Osguthorpe index (Guy, 1985)\cr
+Apparent partition energies calculated from Janin index (Guy, 1985)\cr
+Apparent partition energies calculated from Chothia index (Guy, 1985)\cr
+Hydropathies of amino acid side chains, neutral form (Roseman, 1988)\cr
+Hydropathies of amino acid side chains, pi-values in pH 7.0 (Roseman, 1988)\cr
+Weights from the IFH scale (Jacobs-White, 1989)\cr
+Hydrophobicity index, 3.0 pH (Cowan-Whittaker, 1990)\cr
+Scaled side chain hydrophobicity values (Black-Mould, 1991)\cr
+Hydrophobicity scale from native protein structures (Casari-Sippl, 1992)\cr
+NNEIG index (Cornette et al., 1987)\cr
+SWEIG index (Cornette et al., 1987)\cr
+PRIFT index (Cornette et al., 1987)\cr
+PRILS index (Cornette et al., 1987)\cr
+ALTFT index (Cornette et al., 1987)\cr
+ALTLS index (Cornette et al., 1987)\cr
+TOTFT index (Cornette et al., 1987)\cr
+TOTLS index (Cornette et al., 1987)\cr
+Relative partition energies derived by the Bethe approximation (Miyazawa-Jernigan, 1999)\cr
+Optimized relative partition energies - method A (Miyazawa-Jernigan, 1999)\cr
+Optimized relative partition energies - method B (Miyazawa-Jernigan, 1999)\cr
+Optimized relative partition energies - method C (Miyazawa-Jernigan, 1999)\cr
+Optimized relative partition energies - method D (Miyazawa-Jernigan, 1999)\cr
+Hydrophobicity index (Engelman et al., 1986)\cr
+Hydrophobicity index (Fasman, 1989)
+}
+\source{
+\url{http://www.genome.jp/aaindex}
+}
+\references{
+
+From the original aaindex documentation:\cr
+
+Please cite the following references when making use of the database:
+
+Kawashima, S. and Kanehisa, M. (2000) AAindex: amino acid index
+database. \emph{Nucleic Acids Res.}, \bold{28}:374.\cr
+
+Tomii, K. and Kanehisa, M. (1996) Analysis of amino acid indices and
+mutation matrices for sequence comparison and structure
+prediction of proteins. \emph{Protein Eng.}, \bold{9}:27-36.\cr
+
+Nakai, K., Kidera, A., and Kanehisa, M. (1988) Cluster analysis of
+amino acid indices for prediction of protein structure and
+function. \emph{Protein Eng.} \bold{2}:93-100.\cr
+
+}
+\examples{
+#
+# Load data:
+#
+
+data(aaindex)
+
+#
+# Supose that we need the Kyte & Doolittle Hydrophaty index. We first look
+# at the entries with Kyte as author:
+#
+
+which(sapply(aaindex, function(x) length(grep("Kyte", x$A)) != 0))
+
+#
+# This should return that entry number 151 named KYTJ820101 is the only
+# one that fit our request. We can access to it by position or by name,
+# for instance:
+#
+
+aaindex[[151]]$I
+aaindex[["KYTJ820101"]]$I
+aaindex$KYTJ820101$I
+
+}
+\keyword{datasets}
diff --git a/man/acnucopen.Rd b/man/acnucopen.Rd
new file mode 100644
index 0000000..6213655
--- /dev/null
+++ b/man/acnucopen.Rd
@@ -0,0 +1,79 @@
+\name{acnucopen}
+\alias{acnucopen}
+\alias{acnucclose}
+\alias{clientid}
+\alias{quitacnuc}
+\title{open and close a remote access to an ACNUC database}
+\description{
+These are low level functions to start and stop a remote access to an ACNUC database.
+}
+
+\usage{
+acnucopen(db, socket, challenge = NA)
+acnucclose(socket)
+clientid(id = paste("seqinr_",
+ packageDescription("seqinr")$Version, sep = ""),
+ socket, verbose = FALSE)
+quitacnuc(socket)
+}
+
+\arguments{
+ \item{db}{the remote ACNUC database name}
+ \item{socket}{an object of class \code{sockconn} connecting to an ACNUC server}
+ \item{challenge}{unimplemented yet}
+ \item{id}{client ID definition defaulting to seqinr + package version number}
+ \item{verbose}{logical, if TRUE mode verbose is on}
+}
+
+\details{
+these low level functions are usually not used directly by the user.
+Use \code{\link{choosebank}} to open a remote ACNUC database
+and \code{\link{closebank}} to close it.
+}
+
+\value{
+For \code{openacnuc} a list with the following
+components: type : the type of database that was opened.
+totseqs, totspec, totkey : total number of seqs, species, keywords in opened database.
+ACC\_LENGTH, L\_MNEMO, WIDTH\_KW, WIDTH\_SP, WIDTH\_SMJ, WIDTH\_AUT,
+WIDTH\_BIB, lrtxt, SUBINLNG: max lengths of record keys in database.
+
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{
+ \code{\link{choosebank}}, \code{\link{closebank}}
+}
+
+\examples{
+ \dontrun{# Need internet connection
+ mysocket <- socketConnection( host = "pbil.univ-lyon1.fr",
+ port = 5558, server = FALSE, blocking = TRUE)
+ readLines(mysocket, n = 1) # OK acnuc socket started
+ acnucopen("emblTP", socket = mysocket) -> res
+ expected <- c("EMBL", "14138095", "236401", "1186228", "8",
+ "16", "40", "40", "20", "20", "40", "60", "504")
+ stopifnot(all(unlist(res) == expected))
+ tryalreadyopen <- try(acnucopen("emblTP", socket = mysocket))
+ stopifnot(inherits(tryalreadyopen, "try-error"))
+ # Need a fresh socket because acnucopen() close it if error:
+ mysocket <- socketConnection( host = "pbil.univ-lyon1.fr",
+ port = 5558, server = FALSE, blocking = TRUE)
+ tryoff <- try(acnucopen("off", socket = mysocket))
+ stopifnot(inherits(tryoff, "try-error"))
+
+ mysocket <- socketConnection( host = "pbil.univ-lyon1.fr",
+ port = 5558, server = FALSE, blocking = TRUE)
+ tryinexistent <- try(acnucopen("tagadatagadatsointsoin", socket = mysocket))
+ stopifnot(inherits(tryinexistent, "try-error"))
+
+ mysocket <- socketConnection( host = "pbil.univ-lyon1.fr",
+ port = 5558, server = FALSE, blocking = TRUE)
+ trycloseunopened <- try(acnucclose(mysocket))
+ stopifnot(inherits(trycloseunopened, "try-error"))
+
+ }
+}
+\keyword{utilities}
diff --git a/man/al2bp.Rd b/man/al2bp.Rd
new file mode 100644
index 0000000..c434e54
--- /dev/null
+++ b/man/al2bp.Rd
@@ -0,0 +1,69 @@
+\name{al2bp}
+\alias{al2bp}
+\title{To Convert a forensic microsatellite allele name into its length in base pairs}
+\description{
+Conventions used to name forensic microsatellite alleles (STR) are described
+in Bar \emph{et al.} (1994). The name "9.3" means for instance that there are
+9 repetitions of the complete base oligomer and an incomplete repeat with 3 bp.
+}
+\usage{
+al2bp(allele.name, repeat.bp = 4, offLadderChars = "><", split = "\\\\.")
+}
+\arguments{
+ \item{allele.name}{The name of the allele, coerced to a string type.}
+ \item{repeat.bp}{The length in bp of the microsatellite base repeat, most of them
+are tetranucleotides so that it defaults to 4. Do not forget to change this to 5
+for loci based on pentanucleotides such as Penta D or Penta E.}
+ \item{offLadderChars}{\code{\link{NA}} is returned when at least one of
+ these characters are found in the allele name. Off ladder alleles are typically
+ reported as "<8" or ">19" }
+ \item{split}{The convention is to use a dot, as in "9.3", between the number of repeats
+and the number of bases in the incomplete repeat. On some locales where the
+decimal separator is a comma this could be a source of problem, try to use
+"," instead for this argument which is forwarded to \code{\link{strsplit}}.}
+}
+\value{
+A single numeric value corresponding to the size in bp of the allele, or NA
+when characters spoting off ladder alleles are encountedred or when numeric
+conversion is impossible (\emph{e.g.} with "X" or "Y" allele names at Amelogenin locus).
+}
+\details{
+Warnings generated by faulty numeric conversions are suppressed here.
+}
+\references{
+Bar, W. and Brinkmann, B. and Lincoln, P. and Mayr, W.R. and Rossi, U. (1994)
+DNA recommendations. 1994 report concerning further recommendations
+of the DNA Commission of the ISFH regarding PCR-based polymorphisms
+in STR (short tandem repeat) systems.
+\emph{Int. J. Leg. Med.}, \bold{107}:159-160.
+
+\code{citation("seqinR")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{identifiler}} for forensic microsatellite allele name examples.}
+\examples{
+#
+# Quality check and examples:
+#
+stopifnot( al2bp("9") == 36 ) # 9 repeats of a tetranucleotide is 36 bp
+stopifnot( al2bp(9) == 36 ) # also OK with numerical argument
+stopifnot( al2bp(9, 5) == 45 ) # 9 repeats of a pentanucleotide is 45 bp
+stopifnot( al2bp("9.3") == 39 ) # microvariant case
+stopifnot( is.na(al2bp("<8")) ) # off ladder case
+stopifnot( is.na(al2bp(">19")) ) # off ladder case
+stopifnot( is.na(al2bp("X")) ) # non STR case
+#
+# Application to the alleles names in the identifiler data set where all loci are
+# tetranucleotide repeats:
+#
+data(identifiler)
+al.names <- unlist(identifiler)
+al.length <- sapply(al.names, al2bp)
+loc.names <- unlist(lapply(identifiler, names))
+loc.nall <-unlist(lapply(identifiler, function(x) lapply(x,length)))
+loc.fac <- factor(rep(loc.names, loc.nall))
+par(lend = "butt", mar = c(5,6,4,1)+0.1)
+boxplot(al.length~loc.fac, las = 1, col = "lightblue",
+ horizontal = TRUE, main = "Range of allele lengths at forensic loci",
+ xlab = "Length (bp)", ylim = c(0, max(al.length, na.rm = TRUE)))
+}
diff --git a/man/alllistranks.Rd b/man/alllistranks.Rd
new file mode 100644
index 0000000..fb5e2b0
--- /dev/null
+++ b/man/alllistranks.Rd
@@ -0,0 +1,55 @@
+\name{alllistranks}
+\alias{alllistranks}
+\alias{alr}
+\title{To get the count of existing lists and all their ranks on server}
+\description{
+This is a low level function to get the total number of list and all their
+ranks in an opened database.
+}
+
+\usage{
+alllistranks(socket = autosocket(), verbose = FALSE)
+alr(socket = autosocket(), verbose = FALSE)
+}
+
+\arguments{
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{verbose}{if \code{TRUE}, verbose mode is on}
+}
+
+\details{
+ This low level function is usually not used directly by the user.
+}
+
+\value{
+ A list with two components:
+ \item{count}{count of existing lists}
+ \item{rank}{their rank}
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{
+ \code{\link{choosebank}},
+ \code{\link{query}}
+}
+
+\examples{
+ \dontrun{# Need internet connection
+ choosebank("emblTP")
+ tmp1 <- query("tmp1", "sp=Borrelia burgdorferi", virtual = TRUE)
+ tmp2 <- query("tmp2", "sp=Borrelia burgdorferi", virtual = TRUE)
+ tmp3 <- query("tmp3", "sp=Borrelia burgdorferi", virtual = TRUE)
+ (result <- alllistranks())
+ stopifnot(result$count == 3) # Three ACNUC lists
+ stopifnot(result$ranks == 2:4) # Starting at rank 2
+ #
+ # Summay of current lists defined on the ACNUC server:
+ #
+ sapply(result$ranks, getliststate)
+ closebank()
+ }
+}
+\keyword{utilities}
diff --git a/man/amb.Rd b/man/amb.Rd
new file mode 100644
index 0000000..3d6bfc9
--- /dev/null
+++ b/man/amb.Rd
@@ -0,0 +1,53 @@
+\name{amb}
+\alias{amb}
+\title{Expansion of IUPAC nucleotide symbols}
+\description{
+This function returns the list of nucleotide matching a given IUPAC
+nucleotide symbol, for instance \code{c("c", "g")} for \code{"s"}.
+}
+\usage{
+amb(base, forceToLower = TRUE, checkBase = TRUE,
+IUPAC = s2c("acgturymkswbdhvn"), u2t = TRUE)
+}
+\arguments{
+ \item{base}{an IUPAC symbol for a nucleotide as a single character}
+ \item{forceToLower}{if TRUE the base is forced to lower case}
+ \item{checkBase}{if TRUE the character is checked to belong to the allowed IUPAC symbol list}
+ \item{IUPAC}{the list of allowed IUPAC symbols}
+ \item{u2t}{if TRUE "u" for uracil in RNA are changed into "t" for thymine in DNA}
+}
+\details{
+Non ambiguous bases are returned unchanged (except for "u" when u2t is TRUE).
+}
+\value{
+When base is missing, the list of IUPAC symbols is returned, otherwise
+a vector with expanded symbols.
+}
+\references{
+
+The nomenclature for incompletely specified bases in nucleic acid sequences
+at: \url{http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+
+\seealso{See \code{\link{bma}} for the reverse operation.
+Use \code{\link{tolower}} to change upper case letters into
+lower case letters.}
+
+\examples{
+#
+# The list of IUPAC symbols:
+#
+
+amb()
+
+#
+# And their expansion:
+#
+
+sapply(amb(), amb)
+
+}
+\keyword{utilities}
diff --git a/man/as.alignment.Rd b/man/as.alignment.Rd
new file mode 100644
index 0000000..637c69a
--- /dev/null
+++ b/man/as.alignment.Rd
@@ -0,0 +1,38 @@
+\name{as.alignment}
+\alias{as.alignment}
+\title{Constructor for class alignment}
+\description{
+Returns an object of (S3) class alignment.
+}
+\usage{
+as.alignment(nb = NULL, nam = NULL, seq = NULL, com = NULL)
+}
+\arguments{
+ \item{nb}{integer. The number of sequences in the alignment.}
+ \item{nam}{vector of \code{nb} character strings. The sequence names. }
+ \item{seq}{vector of \code{nb} character strings. The aligned sequences.}
+ \item{com}{vector of \code{nb} character strings. The comments about sequences.}
+}
+
+\value{
+ An object of class \code{alignment} which is a list with the following components:
+ \item{nb}{ the number of aligned sequences }
+ \item{nam}{ a vector of strings containing the names of the aligned sequences }
+ \item{seq}{ a vector of strings containing the aligned sequences}
+ \item{com}{ a vector of strings containing the commentaries for each sequence or \code{NA} if there are no comments }
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry}
+\seealso{
+\code{\link{read.alignment}},
+\code{\link{as.matrix.alignment}}, \code{\link{read.fasta}},
+\code{\link{write.fasta}}, \code{\link{reverse.align}}, \code{\link{dist.alignment}}.
+
+ }
+\examples{
+as.alignment(nb = 2, nam = c("one", "two"),
+ seq = c("-ACGT", "GACG-"), com = c("un", "deux"))
+}
+
diff --git a/man/as.matrix.alignment.Rd b/man/as.matrix.alignment.Rd
new file mode 100644
index 0000000..8d11ce4
--- /dev/null
+++ b/man/as.matrix.alignment.Rd
@@ -0,0 +1,26 @@
+\name{as.matrix.alignment}
+\alias{as.matrix.alignment}
+
+\title{as.matrix.alignment}
+\description{
+Converts an alignment into a matrix of characters
+}
+\usage{
+\method{as.matrix}{alignment}(x, ...)
+}
+\arguments{
+ \item{x}{ an object of the class alignment.}
+ \item{...}{additional arguments to be passed to or from methods.}
+}
+\value{
+ A matrix of characters.
+}
+\author{J.R. Lobry}
+\seealso{
+ \code{\link{read.alignment}}
+}
+\examples{
+ phylip <- read.alignment(file = system.file("sequences/test.phylip",
+ package = "seqinr"), format = "phylip")
+ as.matrix(phylip)
+}
diff --git a/man/autosocket.Rd b/man/autosocket.Rd
new file mode 100644
index 0000000..4dca9fc
--- /dev/null
+++ b/man/autosocket.Rd
@@ -0,0 +1,28 @@
+\name{autosocket}
+\alias{autosocket}
+\title{Returns a socket to the last opened database}
+\description{
+This is a low level function that is mainly used to select automatically
+the last opened ACNUC database for functions using sockets.
+}
+\usage{
+autosocket()
+}
+
+\value{
+An object of class sockconn.
+}
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{connections}}.}
+\examples{
+\dontrun{ #Need internet connection
+ choosebank("emblTP")
+ autosocket()
+ closebank()
+ }
+}
+\keyword{ utilities }
diff --git a/man/baselineabif.Rd b/man/baselineabif.Rd
new file mode 100644
index 0000000..89ef08c
--- /dev/null
+++ b/man/baselineabif.Rd
@@ -0,0 +1,36 @@
+\name{baselineabif}
+\Rdversion{1.1}
+\alias{baselineabif}
+\title{Estimation of baseline value}
+\description{
+This function tries to estimate the baseline value for RFU data from
+capillary electrophoresis whith the heuristic that the most common
+value is the baseline.
+}
+\usage{
+baselineabif(rfu, maxrfu = 1000)
+}
+\arguments{
+ \item{rfu}{a numeric vector of signal value}
+ \item{maxrfu}{signal values greater or equal to maxrfu are forced to NA}
+}
+\value{A single numeric value for the estimated baseline.}
+
+\author{J.R. Lobry}
+
+\seealso{
+\code{\link{JLO}} for a dataset example, \code{\link{plotabif}} to plot this kind of data,
+\code{\link{peakabif}} to estimate peak parameters.
+}
+
+\examples{
+data(JLO)
+rfu <- JLO$Data$DATA.1
+bl <- baselineabif(rfu)
+plot(1:length(rfu), rfu, type = "l",
+ xlab = "Time [datapoint units]",
+ ylab = "Signal [RFU]",
+ main = "Example of baseline estimates")
+abline(h = bl, col="red", lty = 2)
+legend("topright", inset = 0.02, "Baseline estimate", lty = 2, col = "red")
+}
diff --git a/man/bma.Rd b/man/bma.Rd
new file mode 100644
index 0000000..0ef9dde
--- /dev/null
+++ b/man/bma.Rd
@@ -0,0 +1,42 @@
+\name{bma}
+\alias{bma}
+\title{Computing an IUPAC nucleotide symbol}
+\description{
+ This function returns the IUPAC symbol for a nucleotide sequence, for instance
+ \code{c("c", "c", "g")} is coded by \code{"s"}.
+}
+\usage{
+bma(nucl, warn.non.IUPAC = TRUE, type = c("DNA", "RNA"))
+}
+\arguments{
+ \item{nucl}{a nucleotide sequence as a vector of single chars}
+ \item{warn.non.IUPAC}{if TRUE warns when no IUPAC symbol is possible}
+ \item{type}{whether this is a DNA or a RNA sequence}
+}
+\details{
+ The sequence is forced in lower case letters and ambiguous bases
+ are expanded before trying to find an IUPAC symbol.
+}
+\value{
+ A single IUPAC symbol in lower case, or NA when this is not possible.
+}
+\references{
+
+The nomenclature for incompletely specified bases in nucleic acid sequences
+at: \url{http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+
+\seealso{See \code{\link{amb}} for the reverse operation.
+Use \code{\link{toupper}} to change lower case letters into
+upper case letters.}
+
+\examples{
+
+stopifnot(bma(s2c("atatattttata")) == "w")
+stopifnot(bma(s2c("gcggcgcgcggc")) == "s")
+stopifnot(bma(s2c("ACGT")) == "n")
+stopifnot(is.na(bma(s2c("atatttt---tatat")))) # a warning is issued
+}
diff --git a/man/c2s.Rd b/man/c2s.Rd
new file mode 100755
index 0000000..344af75
--- /dev/null
+++ b/man/c2s.Rd
@@ -0,0 +1,26 @@
+\name{c2s}
+\alias{c2s}
+\title{ conversion of a vector of chars into a string }
+\description{
+ This is a simple utility function to convert a vector of chars
+such as c("m", "e", "r", "g", "e", "d") into a single string such
+as "merged".
+}
+\usage{
+c2s(chars = c("m", "e", "r", "g", "e", "d"))
+}
+\arguments{
+ \item{chars}{ a vector of chars }
+}
+\value{
+ a string
+}
+\author{J.R. Lobry}
+\references{
+ \code{citation("seqinr")}
+}
+\seealso{ \code{\link{s2c}} }
+\examples{
+c2s( c("m","e","r","g","e","d") )
+}
+\keyword{utilities}
diff --git a/man/cai.Rd b/man/cai.Rd
new file mode 100644
index 0000000..4147630
--- /dev/null
+++ b/man/cai.Rd
@@ -0,0 +1,95 @@
+\name{cai}
+\alias{cai}
+\title{Codon Adaptation Index}
+\description{
+ The Codon Adaptation Index (Sharp and Li 1987) is the most popular
+ index of gene expressivity with about 1000 citations 20 years after its
+ publication. Its values range from 0 (low) to 1 (high). The implementation
+ here is intended to work exactly as in the program \code{codonW} written by
+ by John Peden during his PhD thesis under the supervision of P.M. Sharp.
+}
+\usage{
+ cai(seq, w, numcode = 1, zero.threshold = 0.0001, zero.to = 0.01)
+}
+\arguments{
+ \item{seq}{a coding sequence as a vector of single characters}
+ \item{w}{a vector for the relative adaptiveness of each codon}
+ \item{numcode}{the genetic code number as in \code{\link{translate}}}
+ \item{zero.threshold}{a value in \code{w} below this threshold is
+ considered as zero}
+ \item{zero.to}{a value considered as zero in \code{w} is forced to
+ this value. The default is from Bulmer (1988).}
+}
+\details{
+Adapted from the documentation of the CAI function in the
+program \code{codonW} writen by John Peden:
+CAI is a measurement
+of the relative adaptiveness of the codon usage of a gene towards the
+codon usage of highly expressed genes. The relative adaptiveness (w) of
+each codon is the ratio of the usage of each codon, to that of the most
+abundant codon for the same amino acid. The CAI
+index is defined as the geometric mean of these relative adaptiveness
+values. Non-synonymous codons and termination codons (genetic code
+dependent) are excluded. To aid computation, the CAI is calculated as
+using a natural log summation, To prevent a codon having a relative
+adaptiveness value of zero, which could result in a CAI of zero;
+these codons have fitness of zero (<.0001) are adjusted to 0.01.
+}
+\value{
+ A single numerical value for the CAI.
+}
+\references{
+ Sharp, P.M., Li, W.-H. (1987) The codon adaptation index - a
+ measure of directional synonymous codon usage bias, and its
+ potential applications.
+ \emph{Nucleic Acids Research}, \bold{15}:1281-1295.
+
+ Bulmer, M. (1988).
+ Are codon usage patterns in unicellular organisms determined by
+ selection-mutation balance.
+ \emph{Journal of Evolutionary Biology}, \bold{1}:15-26.
+
+ Peden, J.F. (1999)
+ Analysis of codon usage.
+ PhD Thesis, University of Nottingham, UK.
+
+ The program \code{codonW} used here for comparison is available at
+ \url{http://codonw.sourceforge.net/} under a GPL licence.
+
+ \code{citation("seqinr")}.
+
+}
+\seealso{
+ \code{\link{caitab}} for some \code{w} values from \code{codonW}.
+ \code{\link{uco}} for codon usage tabulation.
+}
+\author{J.R. Lobry}
+\examples{
+#
+# How to reproduce the results obtained with the C program codonW
+# version 1.4.4 writen by John Peden. We use here the "input.dat"
+# test file from codonW (Saccharomyces cerevisiae).
+#
+ inputdatfile <- system.file("sequences/input.dat", package = "seqinr")
+ input <- read.fasta(file = inputdatfile) # read the FASTA file
+#
+# Import results obtained with codonW
+#
+ scucofile <- system.file("sequences/scuco.txt", package = "seqinr")
+ scuco.res <- read.table(scucofile, header = TRUE) # read codonW result file
+#
+# Use w for Saccharomyces cerevisiae
+#
+ data(caitab)
+ w <- caitab$sc
+#
+# Compute CAI and compare results:
+#
+ cai.res <- sapply(input, cai, w = w)
+ plot(cai.res, scuco.res$CAI,
+ main = "Comparison of seqinR and codonW results",
+ xlab = "CAI from seqinR",
+ ylab = "CAI from codonW",
+ las = 1)
+ abline(c(0,1))
+}
diff --git a/man/caitab.Rd b/man/caitab.Rd
new file mode 100644
index 0000000..cee4656
--- /dev/null
+++ b/man/caitab.Rd
@@ -0,0 +1,73 @@
+\name{caitab}
+\alias{caitab}
+\docType{data}
+\title{Codon Adaptation Index (CAI) w tables}
+\description{
+Information about a preferred set of codons for highly expressed genes
+in three species.
+}
+\usage{data(caitab)}
+\format{
+ A data frame with 64 rows for the codons and the following 3 columns:
+ \describe{
+ \item{ec}{\emph{Escherichia coli}}
+ \item{bs}{\emph{Bacillus subtilis}}
+ \item{sc}{\emph{Saccharomyces cerevisiae}}
+ }
+}
+\details{
+ Codons are given by \code{row.names(caitab)}.
+}
+\source{
+The data were hard-encoded in
+the C program codonW version 1.4.4 writen by John Peden
+available at \url{http://codonw.sourceforge.net/}. The data
+are from the file \code{codonW.h}.
+According to this source file, there were no reference for
+\emph{Escherichia coli} and \emph{Bacillus subtilis} and the
+reference for \emph{Saccharomyces cerevisiae} was Sharp
+and Cowe (1991).
+
+It turns out that the data for \emph{Escherichia coli} and
+\emph{Saccharomyces cerevisiae} are identical to table 1
+in Sharp and Li (1987) where the missing values for the stop
+codons are represented here by zeros. All codons were documented
+by at least one count in both datasets.
+
+The data for \emph{Bacillus subtilis} are from table 2 in Shields
+and Sharp (1987). Missing values for stops codons are represented
+as previously by zeros, missing values for single-box amino-acids
+are represented by 1 here. Note that some codons were undocumented
+in this dataset and that a 0.5 value in absolute frequencies was
+already forced to avoid zeros. It is therefore impossible to use
+directly these data to obtain the exact expected CAI values as documented
+in \code{\link{cai}} because of overlapping with documented codons.
+
+}
+\references{
+ Sharp, P.M., Li, W.-H. (1987) The codon adaptation index - a
+ measure of directional synonymous codon usage bias, and its
+ potential applications.
+ \emph{Nucleic Acids Research}, \bold{15}:1281-1295.
+
+ Shields, D.C., Sharp, P.M. (1987) Synonymous codon usage in \emph{Bacillus subtilis}
+ reflects both traditional selection and mutational biases.
+ \emph{Nucleic Acids Research}, \bold{15}:8023-8040.
+
+ Sharp, P. M., Cowe, E. (1991).
+ Synonymous codon usage in \emph{Saccharomyces cerevisiae}.
+ \emph{Yeast}, \bold{7}:657-678.
+
+ Peden, J.F. (1999)
+ Analysis of codon usage.
+ PhD Thesis, University of Nottingham, UK.
+
+ \code{citation("seqinr")}
+}
+\seealso{
+ \code{\link{cai}} for an example using this dataset to compute CAI values.
+}
+\examples{
+ data(caitab)
+}
+\keyword{datasets}
diff --git a/man/chargaff.Rd b/man/chargaff.Rd
new file mode 100644
index 0000000..9aefd15
--- /dev/null
+++ b/man/chargaff.Rd
@@ -0,0 +1,131 @@
+\name{chargaff}
+\alias{chargaff}
+\docType{data}
+\title{Base composition in ssDNA for 7 bacterial DNA}
+\description{
+Long before the genomic era, it was possible to get some data
+for the global composition of single-stranded DNA chromosomes
+by direct chemical analyses. These data are from Chargaff's lab
+and give the base composition of the L (Ligth) strand for
+7 bacterial chromosomes.
+}
+\usage{data(chargaff)}
+\format{
+ A data frame with 7 observations on the following 4 variables.
+ \describe{
+ \item{[A]}{frequencies of A bases in percent}
+ \item{[G]}{frequencies of G bases in percent}
+ \item{[C]}{frequencies of C bases in percent}
+ \item{[T]}{frequencies of T bases in percent}
+ }
+}
+\details{
+Data are from Table 2 in Rudner \emph{et al.} (1969) for the
+L-strand. Data for \emph{Bacillus subtilis} were taken from
+a previous paper: Rudner \emph{et al.} (1968). This is in
+fact the average value observed for two different strains
+of \emph{B. subtilis}: strain W23 and strain Mu8u5u16.\cr
+Denaturated chromosomes can be separated by a technique of
+intermitent gradient elution from a column of methylated
+albumin kieselguhr (MAK), into two fractions, designated,
+by virtue of their buoyant densities, as L (light) and H
+(heavy). The fractions can be hydrolyzed and subjected to
+chromatography to determined their global base composition.\cr
+The surprising result is that we have almost exactly A=T
+and C=G in single stranded-DNAs. The second paragraph page
+157 in Rudner \emph{et al.} (1969) says: "Our previous
+work on the complementary strands of \emph{B. subtilis} DNA
+suggested an additional, entirely unexpected regularity,
+namely, the equality in either strand of 6-amino and 6-keto
+nucleotides ( A + C = G + T). This relationship, which
+would normally have been regarded merely as the consequence
+of base-pairing in DNA duplex and would not have been predicted
+as a likely property of a single strand, is shown here to
+apply to all strand specimens isolated from denaturated DNA
+of the AT type (Table 2, preps. 1-4). It cannot yet be said
+to be established for the DNA specimens from the equimolar
+and GC types (nos. 5-7)."
+
+Try \code{example(chargaff)} to mimic figure page 17 in Lobry
+(2000) :
+
+\if{html}{\figure{chargaff.png}{options: width=400}}
+\if{latex}{\figure{chargaff.png}{options: width=12cm}}
+
+
+Note that \code{example(chargaff)} gives more details:
+the red areas correspond to non-allowed values beause the sum
+of the four bases frequencies cannot exceed 100\%.
+The white areas correspond to possible values (more exactly
+to the projection from \code{R^4} to the corresponding \code{R^2} planes
+of the region of allowed values).
+The blue lines correspond to the very small subset of allowed
+values for which we have in addition PR2 state, that is
+\code{[A]=[T]} and \code{[C]=[G]}. Remember, these data are for ssDNA!
+}
+
+\source{
+Rudner, R., Karkas, J.D., Chargaff, E. (1968) Separation of
+\emph{B. subtilis} DNA into complementary strands, III. Direct
+Analysis. \emph{Proceedings of the National Academy of Sciences of the United States of America}, \bold{60}:921-922.\cr
+Rudner, R., Karkas, J.D., Chargaff, E. (1969) Separation of microbial deoxyribonucleic acids into complementary strands. \emph{Proceedings of the National Academy of Sciences of the United States of America}, \bold{63}:152-159.\cr
+
+}
+\references{
+Lobry, J.R. (2000) The black hole of symmetric molecular evolution. Habilitation thesis, Université Claude Bernard - Lyon 1. \url{https://pbil.univ-lyon1.fr/members/lobry/articles/HDR.pdf}.
+
+\code{citation("seqinr")}
+}
+
+\examples{
+data(chargaff)
+op <- par(no.readonly = TRUE)
+par(mfrow = c(4,4), mai = rep(0,4), xaxs = "i", yaxs = "i")
+xlim <- ylim <- c(0, 100)
+
+for( i in 1:4 )
+{
+ for( j in 1:4 )
+ {
+ if( i == j )
+ {
+ plot(chargaff[,i], chargaff[,j],t = "n", xlim = xlim, ylim = ylim,
+ xlab = "", ylab = "", xaxt = "n", yaxt = "n")
+ polygon(x = c(0, 0, 100, 100), y = c(0, 100, 100, 0), col = "lightgrey")
+ for( k in seq(from = 0, to = 100, by = 10) )
+ {
+ lseg <- 3
+ segments(k, 0, k, lseg)
+ segments(k, 100 - lseg, k, 100)
+ segments(0, k, lseg, k)
+ segments(100 - lseg, k, 100, k)
+ }
+ string <- paste(names(chargaff)[i],"\n\n",xlim[1],"\% -",xlim[2],"\%")
+ text(x=mean(xlim),y=mean(ylim), string, cex = 1.5)
+ }
+ else
+ {
+ plot(chargaff[,i], chargaff[,j], pch = 1, xlim = xlim, ylim = ylim,
+ xlab = "", ylab = "", xaxt = "n", yaxt = "n", cex = 2)
+ iname <- names(chargaff)[i]
+ jname <- names(chargaff)[j]
+ direct <- function() segments(0, 0, 50, 50, col="blue")
+ invers <- function() segments(0, 50, 50, 0, col="blue")
+ PR2 <- function()
+ {
+ if( iname == "[A]" & jname == "[T]" ) { direct(); return() }
+ if( iname == "[T]" & jname == "[A]" ) { direct(); return() }
+ if( iname == "[C]" & jname == "[G]" ) { direct(); return() }
+ if( iname == "[G]" & jname == "[C]" ) { direct(); return() }
+ invers()
+ }
+ PR2()
+ polygon(x = c(0, 100, 100), y = c(100, 100, 0), col = "pink4")
+ polygon(x = c(0, 0, 100), y = c(0, 100, 0))
+ }
+ }
+}
+# Clean up
+par(op)
+}
+\keyword{datasets}
diff --git a/man/choosebank.Rd b/man/choosebank.Rd
new file mode 100644
index 0000000..58fc91d
--- /dev/null
+++ b/man/choosebank.Rd
@@ -0,0 +1,97 @@
+\name{choosebank}
+\alias{choosebank}
+\title{To select a database structured under ACNUC and located on the web}
+\description{
+ This function allows to select one of the databases structured under ACNUC and located on the web.
+ Called without arguments, \code{choosebank()}, will return the list of available databases.
+ Then, you can use \code{\link{query}} to make your query and get a list of sequence names.
+ Remote access to ACNUC databases works by opening a socket connection on a port (for example
+ on port number 5558 at pbil.univ-lyon1.fr) and by communicating on this socket following the protocol
+ described in the section \code{references}.
+}
+\usage{
+choosebank(bank = NA, host = "pbil.univ-lyon1.fr", port = 5558, server = FALSE,
+ blocking = TRUE, open = "a+", encoding = "", verbose = FALSE,
+ timeout = 5, infobank = FALSE, tagbank = NA)
+}
+\arguments{
+ \item{bank}{string. The name of the bank. If NA, \code{choosebank} will return the names of all database known by the server.}
+ \item{host}{string. Host name for port (see \code{\link{socketConnection}})}
+ \item{port}{integer. The TCP port number (see \code{\link{socketConnection}})}
+ \item{server}{logical. Should the socket be a client or a server? (see \code{\link{socketConnection}})}
+ \item{blocking}{logical. (see \code{\link{socketConnection}})}
+ \item{open}{string. A description of how to open the connection (see \code{\link{socketConnection}})}
+ \item{encoding}{string. The name of the encoding to be used. (see \code{\link{socketConnection}})}
+ \item{verbose}{logical. If TRUE, verbose mode is on}
+ \item{timeout}{integer. The timeout in seconds for \code{socketConnection}. Default 5 seconds.}
+ \item{infobank}{logical. If \code{infobank} is TRUE and \code{bank} is \code{NA}, a data.frame
+ with all database informations will be returned}
+ \item{tagbank}{string. If \code{bank} is \code{NA} and \code{tagbank} is documented, the names
+ of special purposes databases are returned. Current allowed values are TP
+ for frozen databases (TP is an acronym for "travaux pratiques" which means practicals
+ in french, these databases are useful mainly for teaching so as to have stable results),
+ TEST for test databases, and DEV for databases under development (unstable).}
+}
+\details{
+ When called without arguments, \code{choosebank()} returns a list of all the databases names known
+ by the server, as a vector of string. When called with \code{choosebank(infobank = TRUE)}, a data.frame
+ with more information is returned.
+}
+\value{
+When called with a regular bank name, an (invisible) list with 6 components:
+
+ \item{socket}{ an object of class \code{socket} }
+ \item{bankname}{ the name of the bank }
+ \item{banktype}{ the type of the bank (GENBANK, EMBL, SWISSPROT, NBRF)}
+ \item{totseqs}{ the total number of sequences present in the opened database }
+ \item{totspecs}{ the total number of species present in the opened database }
+ \item{totkeys}{ the total number of keywords present in the opened database }
+
+When called with bank = NA:
+
+ \item{ }{ A vector of all available bank names.}
+
+When called with bank = NA and infobank = TRUE, a data.frame with three columns:
+
+ \item{bank}{ The name of the bank. }
+ \item{status}{ The bank status (on/of). }
+ \item{info}{ Short description of bank with last release date. }
+
+}
+\references{
+For more information about the socket communication protocol with ACNUC please get at \url{http://doua.prabi.fr/databases/acnuc/remote_acnuc.html}.
+To get the release date and content of all the databases located at the pbil, please look at the following url: \url{http://doua.prabi.fr/search/releases}\cr
+Gouy, M., Milleret, F., Mugnier, C., Jacobzone, M., Gautier,C. (1984) ACNUC: a nucleic acid sequence data base and analysis system.
+\emph{Nucl. Acids Res.}, \bold{12}:121-127.\cr
+Gouy, M., Gautier, C., Attimonelli, M., Lanave, C., Di Paola, G. (1985)
+ACNUC - a portable retrieval system for nucleic acid sequence databases:
+logical and physical designs and usage.
+\emph{Comput. Appl. Biosci.}, \bold{3}:167-172.\cr
+Gouy, M., Gautier, C., Milleret, F. (1985) System analysis and nucleic acid sequence banks.
+\emph{Biochimie}, \bold{67}:433-436.\cr
+
+\code{citation("seqinr")}
+}
+\note{
+The invisible list returned when a database is opened is stored in the variable
+\code{banknameSocket} in the global environment.
+}
+\author{D. Charif, J.R. Lobry}
+\seealso{\code{\link{where.is.this.acc}} if you have a sequence accession number but you
+don't know which database to open, \code{\link{query}} to make a query when a database
+is opened, \code{\link{connection}}, \code{\link{socketConnection}} }
+\examples{
+ \dontrun{# Need internet connection
+ # Show available databases:
+ choosebank()
+ # Show frozen databases:
+ choosebank(tag = "TP")
+ # Select a database:
+ choosebank("emblTP", tag = "TP")
+ # Do something with the database:
+ myseq <- gfrag("LMFLCHR36", start = 1, length = 30)
+ stopifnot(myseq == "cgcgtgctggcggcaatgaagcgttcgatg")
+ # Close the database:
+ closebank()}
+}
+\keyword{ manip }
diff --git a/man/circle.Rd b/man/circle.Rd
new file mode 100644
index 0000000..0a268f4
--- /dev/null
+++ b/man/circle.Rd
@@ -0,0 +1,45 @@
+\name{circle}
+\alias{circle}
+\title{Draws a circle}
+\description{Draws a circle or an arc-circle on the current graphic device}
+\usage{
+circle(x = 0, y = 0, r = 1, theta = c(0, 360), n = 100, ...)
+}
+\arguments{
+ \item{x}{x coordinate for the center of the circle}
+ \item{y}{y coordinate for the center of the circle}
+ \item{r}{radius of the circle}
+ \item{theta}{start and stop angle}
+ \item{n}{number of points for polygon object}
+ \item{\dots}{arguments passed to \code{\link{polygon}}}
+}
+\value{none}
+\author{J.R. Lobry}
+
+\seealso{\code{\link{polygon}}}
+\examples{
+par(mfrow = c(2, 2), mar = c(0,0,2,0))
+setup <- function(){
+ plot.new()
+ plot.window(xlim = c(-1,1), ylim = c(-1,1), asp = 1)
+}
+
+setup()
+circle(col = "lightblue")
+title(main = "theta = c(0, 360)")
+
+setup()
+circle(col = "lightblue", theta = c(0, 270))
+title(main = "theta = c(0, 270)")
+
+setup()
+circle(col = "lightblue", theta = c(-90, 180))
+title(main = "theta = c(-90, 180)")
+
+setup()
+n <- 20
+for(i in seq(0, 360, length = n)){
+ circle(col = "lightblue", theta = c(i, i+360/(2*n)))
+}
+title(main = "many thetas")
+}
diff --git a/man/closebank.Rd b/man/closebank.Rd
new file mode 100644
index 0000000..a96e630
--- /dev/null
+++ b/man/closebank.Rd
@@ -0,0 +1,27 @@
+\name{closebank}
+\alias{closebank}
+\title{ To close a remote ACNUC database }
+\description{
+ This function tries to close a remote ACNUC database.
+}
+\usage{
+closebank(socket = autosocket(), verbose = FALSE)
+}
+\arguments{
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{verbose}{ Logical. If TRUE, verbose mode is on }
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry }
+
+\seealso{ \code{\link{choosebank}}}
+\examples{
+ \dontrun{# Need internet connection
+ choosebank("emblTP")
+ closebank()
+ }
+}
+\keyword{ manip }
diff --git a/man/clustal.Rd b/man/clustal.Rd
new file mode 100644
index 0000000..ea588f5
--- /dev/null
+++ b/man/clustal.Rd
@@ -0,0 +1,13 @@
+\name{clustal}
+ \docType{data}
+ \alias{clustal}
+ \title{Example of results obtained after a call to read.alignment}
+ \description{This data set gives an example of a protein alignment obtained after a call to the function read.alignment on an alignment file in "clustal" format.}
+ \usage{clustal}
+ \format{A List of class alignment}
+ \source{http://www.clustal.org/}
+ \references{
+ Thompson, J.D., Higgins D.G., Gibson T.J. (1994) \emph{CLUSTAL W: improving the sensitivity of progressive multiple sequence alignment through sequence weighting, position specific gap penalties and weight matrix choice}.
+ Nucleic Acids Res. 22(22):4673-80.
+ }
+ \keyword{datasets}
diff --git a/man/col2alpha.Rd b/man/col2alpha.Rd
new file mode 100644
index 0000000..b4cca4e
--- /dev/null
+++ b/man/col2alpha.Rd
@@ -0,0 +1,49 @@
+\name{col2alpha}
+\Rdversion{1.1}
+\alias{col2alpha}
+\title{To use a standard color with an alpha transparency chanel}
+\description{
+Takes as input a standard R color and an alpha value to return
+its rgb coding.
+}
+\usage{
+col2alpha(color, alpha = 0.5)
+}
+\arguments{
+ \item{color}{A standard R color as in \code{\link{colors}}.}
+ \item{alpha}{An alpha transparency value in the interval [0,1].}
+}
+\value{same as in \code{\link{rgb}}.}
+\author{J.R. Lobry}
+\seealso{\code{\link{colors}}, \code{\link{col2rgb}}, \code{\link{rgb}}.}
+\examples{
+#
+# Need alpha transparency channel
+#
+par(mar = c(0, 0, 2, 2)+0.1, oma = c(0, 0, 2, 0), mfrow = c(3,2))
+for(testcol in c("blue", "red", "green", "yellow", "purple", "darkgreen")){
+ plot(0,0, type="n", xlim=0:1, ylim = 0:1, axes = FALSE, xlab = "", ylab = "", main = testcol)
+ n <- 11
+ for(i in seq(0, 1, length = n)){
+ col <- col2alpha(testcol, i)
+ rect(i, 0, i + 1/n, 1, col = col, border = "black", xpd = NA)
+ text(i+0.5/n, 0.5, round(i,2), xpd = NA)
+ }
+}
+mtext("Effect of alpha on some colors\nNote: need alpha transparency channel",
+ side = 3, outer = TRUE)
+#
+# The substractive color scheme:
+#
+par(mar = c(0,0,3,0))
+plot.new()
+plot.window(xlim = c(-1.5, 1.5), ylim = c(-1,1.75), asp = 1)
+n <- 10
+alpha <- 1/n
+for(i in 1:(2*n)){
+ circle(x = -0.5, y = 0, col = col2alpha("yellow", alpha))
+ circle(x = 0.5, y = 0, col = col2alpha("cyan", alpha))
+ circle(x = 0, y = 3/4, col = col2alpha("magenta", alpha))
+}
+title("Substractive color scheme\nNote: need alpha transparency channel")
+}
diff --git a/man/comp.Rd b/man/comp.Rd
new file mode 100644
index 0000000..ef2cafe
--- /dev/null
+++ b/man/comp.Rd
@@ -0,0 +1,64 @@
+\name{comp}
+\alias{comp}
+\title{ complements a nucleic acid sequence }
+\description{
+ Complements a sequence, for instance if the sequence is
+ \code{"a","c","g","t"} it returns \code{"t","g","c","a"}.
+ This is not the reverse complementary strand. This function
+ can handle ambiguous bases if required.
+}
+\usage{
+comp(seq, forceToLower = TRUE, ambiguous = FALSE)
+}
+\arguments{
+ \item{seq}{ a DNA sequence as a vector of single chars }
+ \item{forceToLower}{ if TRUE characters in \code{seq} are forced to lower case}
+ \item{ambiguous}{ if TRUE ambiguous bases in \code{seq} are handled}
+}
+\value{
+ a vector of characters which is the complement of the sequence,
+ not the reverse complementary strand. Undefined values are
+ returned as NA.
+}
+\author{D. Charif, J.R. Lobry}
+\references{
+ \code{citation("seqinr")}
+}
+\seealso{ Because ssDNA sequences are always written in the 5'->3'
+direction, use rev(comp(seq)) to get the reverse complementary
+strand (see \code{\link{rev}}).
+}
+\examples{
+##
+## Show that comp() does *not* return the reverve complementary strand:
+##
+
+c2s(comp(s2c("aaaattttggggcccc")))
+
+##
+## Show how to get the reverse complementary strand:
+##
+
+c2s(rev(comp(s2c("aaaattttggggcccc"))))
+
+##
+## Show what happens with non allowed values:
+##
+
+c2s(rev(comp(s2c("aaaaXttttYggggZcccc"))))
+
+##
+## Show what happens with ambiguous bases:
+##
+
+allbases <- s2c("abcdghkmstvwn")
+comp(allbases) # NA are produced
+comp(allbases, ambiguous = TRUE) # No more NA
+
+##
+## Routine sanity checks:
+##
+
+stopifnot(identical(comp(allbases, ambiguous = TRUE), s2c("tvghcdmksabwn")))
+stopifnot(identical(comp(c("A", "C", "G", "T"), forceToLower = FALSE), c("T", "G", "C", "A")))
+}
diff --git a/man/computePI.Rd b/man/computePI.Rd
new file mode 100644
index 0000000..048080e
--- /dev/null
+++ b/man/computePI.Rd
@@ -0,0 +1,41 @@
+\name{computePI}
+\alias{computePI}
+\title{To Compute the Theoretical Isoelectric Point}
+\description{
+ This function calculates the theoretical isoelectric point of a protein. Isoelectric point is the pH at which the protein has a neutral charge.
+This estimate does not account for the post-translational modifications.
+}
+\usage{
+computePI(seq)
+}
+\arguments{
+ \item{seq}{ Protein sequence as a vector of single chars in upper case}
+}
+\value{
+ The theoretical isoelectric point (pI) as a numerical vector of length one.
+}
+\references{
+The algorithm is the same as the one which is implemented at the following url:
+\url{http://www.expasy.org/tools/pi_tool-doc.html} but with many trials
+in case of convergence failure of the non-linear regression procedure.
+\code{citation("seqinr")}
+ }
+\author{D. Charif, J.R. Lobry}
+\note{
+Protein pI is calculated using pK values of amino acids described in Bjellqvist et al. See also \code{SEQINR.UTIL} for more details.
+ }
+\seealso{\code{\link{SEQINR.UTIL}} }
+\examples{
+#
+# Simple sanity check with all 20 amino-acids in one-letter code alphabetical order:
+#
+prot <- s2c("ACDEFGHIKLMNPQRSTVWY")
+stopifnot(all.equal(computePI(prot), 6.78454))
+#
+# Read a protein sequence in a FASTA file and then compute its pI :
+#
+myProts <- read.fasta(file = system.file("sequences/seqAA.fasta",
+ package = "seqinr"), seqtype = "AA")
+computePI(myProts[[1]]) # Should be 8.534902
+}
+\keyword{manip}
diff --git a/man/consensus.Rd b/man/consensus.Rd
new file mode 100644
index 0000000..4afbdea
--- /dev/null
+++ b/man/consensus.Rd
@@ -0,0 +1,103 @@
+\name{consensus}
+\alias{consensus}
+\alias{con}
+\title{Consensus and profiles for sequence alignments}
+\description{
+ This function returns a consensus using variuous methods (see details)
+ or a profile from a sequence alignment.
+}
+\usage{
+consensus(matali, method = c( "majority", "threshold", "IUPAC", "profile"),
+ threshold = 0.60, warn.non.IUPAC = FALSE, type = c("DNA", "RNA"))
+con(matali, method = c( "majority", "threshold", "IUPAC", "profile"),
+ threshold = 0.60, warn.non.IUPAC = FALSE, type = c("DNA", "RNA"))
+}
+\arguments{
+ \item{matali}{an object of class \code{alignment} as returned by
+ \code{\link{read.alignment}}, or a matrix of characters.}
+ \item{method}{select the method to use, see details.}
+ \item{threshold}{for the \code{threshold} method, a numeric value beteen 0 and 1
+ indicating the minimum relative frequency for a character to be returned
+ as the consensus character. If none, NA is returned.}
+ \item{warn.non.IUPAC}{for the \code{IUPAC} method this argument is passed
+ to \code{\link{bma}} with a default value set to FALSE to avoid warnings due
+ to gap characters in the alignment.}
+ \item{type}{for the \code{IUPAC} method this argument is passed
+ to \code{\link{bma}}.}
+}
+\details{
+ \describe{
+ \item{"majority"}{The character with the higher frequency is returned as the
+ consensus character.}
+
+ \item{"threshold"}{As above but in addition the character relative frequency
+ must be higher than the value controled by the \code{threshold} argument.
+ If none, NA id returned.}
+
+ \item{"IUPAC"}{Make sense only for nucleic acid sequences (DNA or RNA).
+ The consensus character is defined if possible by an IUPAC symbol by
+ function \code{\link{bma}}. If this is not possible, when there is a gap
+ character for instance, NA is returned.}
+
+ \item{"profile"}{With this method a matrix with the count of each possible
+ character at each position is returned.}
+ }
+\code{con} is a short form for \code{consensus}.
+
+}
+\value{
+ Either a vector of single characters with possible NA or a matrix with
+ the method \code{profile}.
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+
+\seealso{See \code{\link{read.alignment}} to import alignment from files.}
+
+\examples{
+#
+# Read 5 aligned DNA sequences at 42 sites:
+#
+ phylip <- read.alignment(file = system.file("sequences/test.phylip",
+ package = "seqinr"), format = "phylip")
+#
+# Show data in a matrix form:
+#
+ (matali <- as.matrix(phylip))
+#
+# With the majority rule:
+#
+ res <- consensus(phylip)
+ stopifnot(c2s(res) == "aaaccctggccgttcagggtaaaccgtggccgggcagggtat")
+#
+# With a threshold:
+#
+ res.thr <- consensus(phylip, method = "threshold")
+ res.thr[is.na(res.thr)] <- "." # change NA into dots
+# stopifnot(c2s(res.thr) == "aa.c..t.gc.gtt..g..t.a.cc..ggccg.......ta.")
+ stopifnot(c2s(res.thr) == "aa.cc.tggccgttcagggtaaacc.tggccgg.cagggtat")
+#
+# With an IUPAC summary:
+#
+ res.iup <- consensus(phylip, method = "IUPAC")
+ stopifnot(c2s(res.iup) == "amvsbnkkgcmkkkmmgsktrmrssndkgcmrkdmmvskyaw")
+ # replace 3 and 4-fold symbols by dots:
+ res.iup[match(res.iup, s2c("bdhvn"), nomatch = 0) > 0] <- "."
+ stopifnot(c2s(res.iup) == "am.s..kkgcmkkkmmgsktrmrss..kgcmrk.mm.skyaw")
+#
+# With a profile method:
+#
+ (res <- consensus(phylip, method = "profile"))
+#
+# Show the connection between the profile and some consensus:
+#
+ bxc <- barplot(res, col = c("green", "blue", "orange", "white", "red"), border = NA,
+ space = 0, las = 2, ylab = "Base count",
+ main = "Profile of a DNA sequence alignment",
+ xlab = "sequence position", xaxs = "i")
+
+ text(x = bxc, y = par("usr")[4],lab = res.thr, pos = 3, xpd = NA)
+ text(x = bxc, y = par("usr")[1],lab = res.iup, pos = 1, xpd = NA)
+}
diff --git a/man/count.Rd b/man/count.Rd
new file mode 100644
index 0000000..8bc234b
--- /dev/null
+++ b/man/count.Rd
@@ -0,0 +1,95 @@
+\name{count}
+\alias{count}
+\title{Composition of dimer/trimer/etc oligomers}
+\description{
+ Counts the number of times dimer/trimer/etc oligomers occur in a
+ sequence. Note that the oligomers are overlapping by default.
+}
+\usage{
+count(seq, wordsize, start = 0, by = 1,
+ freq = FALSE, alphabet = s2c("acgt"), frame = start)
+}
+\arguments{
+ \item{seq}{a vector of single characters.}
+ \item{wordsize}{an integer giving the size of word (n-mer) to count.}
+ \item{start}{an integer (0, 1, 2,...) giving the starting
+ position to consider in the sequence. The default value 0 means that
+ we start at the first nucleotide in the sequence.}
+ \item{by}{an integer defaulting to 1 for the window step.}
+ \item{freq}{if TRUE, word relative frequencies (summing to 1) are returned instead of counts}
+ \item{alphabet}{a vector of single characters used to build the oligomer set.}
+ \item{frame}{synonymous for start}
+}
+\details{
+ \code{count} counts the occurence of all words by moving a window of
+ length \code{word}. The window step is controlled by the argument \code{by}.
+\code{start} controls the starting position in the sequence for the count.
+}
+\value{
+ This function returns a \code{\link{table}} whose \code{\link{dimnames}} are all the possible
+ oligomers. All oligomers are returned, even if absent from
+ the sequence.
+}
+\author{D. Charif, J.R. Lobry with suggestions from Gabriel Valiente, Stefanie Hartmann and Christian Gautier}
+\references{
+ \code{citation("seqinr")}
+}
+\seealso{ \code{\link{table}} for the class of the returned objet. See \code{\link{rho}} and
+ \code{\link{zscore}} for dinucleotide statistics.}
+\examples{
+a <- s2c("acgggtacggtcccatcgaa")
+##
+## To count dinucleotide occurrences in sequence a:
+##
+count(a, word = 2)
+##
+## To count trinucleotide occurrences in sequence a, with start = 2:
+##
+count(a, word = 3, start = 2)
+##
+## To count dinucleotide relative frequencies in sequence a:
+##
+count(a, word = 2, freq = TRUE)
+##
+## To count dinucleotides in codon positions III-I in a coding sequence:
+##
+alldinuclIIIpI <- s2c("NNaaNatNttNtgNgtNtcNctNtaNagNggNgcNcgNgaNacNccNcaNN")
+resIIIpI <- count(alldinuclIIIpI, word = 2, start = 2, by = 3)
+stopifnot(all( resIIIpI == 1))
+##
+## Simple sanity check:
+##
+#alldinucl <- "aattgtctaggcgacca"
+#stopifnot(all(count(s2c(alldinucl), 2) == 1))
+#alldiaa <- "aaxxzxbxvxyxwxtxsxpxfxmxkxlxixhxgxexqxcxdxnxrxazzbzvzyzwztzszpzfzmzkzlzizhzgzezqzczdznz
+#rzabbvbybwbtbsbpbfbmbkblbibhbgbebqbcbdbnbrbavvyvwvtvsvpvfvmvkvlvivhvgvevqvcvdvnvrvayywytysypyfymyky
+#lyiyhygyeyqycydynyryawwtwswpwfwmwkwlwiwhwgwewqwcwdwnwrwattstptftmtktltithtgtetqtctdtntrtasspsfsmsks
+#lsishsgsesqscsdsnsrsappfpmpkplpiphpgpepqpcpdpnprpaffmfkflfifhfgfefqfcfdfnfrfammkmlmimhmgmemqmcmdmnm
+#rmakklkikhkgkekqkckdknkrkallilhlglelqlcldlnlrlaiihigieiqicidiniriahhghehqhchdhnhrhaggegqgcgdgngrgae
+#eqecedenereaqqcqdqnqrqaccdcncrcaddndrdannrnarra"
+#stopifnot(all(count(s2c(alldiaa), 2, alphabet = s2c("arndcqeghilkmfpstwyvbzx")) == 1))
+##
+## Example with dinucleotide count in the complete Human mitochondrion genome:
+##
+humanMito <- read.fasta(file = system.file("sequences/humanMito.fasta", package = "seqinr"))
+##
+## Get the dinucleotide count:
+##
+dinu <- count(humanMito[[1]], 2)
+##
+## Put the results in a 4 X 4 array:
+##
+dinu2 <- dinu
+dim(dinu2) <- c(4, 4)
+nucl <- s2c("ACGT")
+dimnames(dinu2) <- list(paste(nucl, "-3\'", sep = ""), paste("5\'-", nucl, sep = ""))
+##
+## Show that CpG and GpT dinucleotides are depleted:
+##
+mosaicplot(t(dinu2), shade = TRUE,
+ main = "Dinucleotide XpY frequencies in the Human\nmitochondrion complete genome",
+ xlab = "First nucleotide: Xp",
+ ylab = "Second nucleotide: pY", las = 1, cex = 1)
+mtext("Note the depletion in CpG and GpT dinucleotides", side = 1, line = 3)
+}
+\keyword{ manip }
diff --git a/man/countfreelists.Rd b/man/countfreelists.Rd
new file mode 100644
index 0000000..c03171b
--- /dev/null
+++ b/man/countfreelists.Rd
@@ -0,0 +1,41 @@
+\name{countfreelists}
+\alias{countfreelists}
+\alias{cfl}
+\title{The number of free lists available and annotation lines in an ACNUC server}
+\description{
+Returns the number of free lists available list of names of annotation lines in the
+opened ACNUC database.
+}
+\usage{
+countfreelists(socket = autosocket())
+cfl(socket = autosocket())
+}
+\arguments{
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+}
+
+\value{
+a list with the following 2 components:
+
+ \item{free}{numeric. The number of free lists}
+ \item{annotlines}{vector of strings. Names of annotation lines}
+}
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}} }
+\examples{
+\dontrun{ # Need internet connection
+ choosebank("emblTP")
+ (rescountfreelists <- countfreelists())
+ stopifnot(all(rescountfreelists$annotlines ==
+ c("ALL", "AC", "PR", "DT", "KW", "OS", "OC",
+ "OG", "RN", "RC", "RP", "RX", "RG", "RA", "RT", "RL", "DR",
+ "CC", "AH", "AS", "FH", "FT", "CO", "SQ", "SEQ")))
+ closebank()
+ }
+}
+\keyword{ utilities }
diff --git a/man/countsubseqs.Rd b/man/countsubseqs.Rd
new file mode 100644
index 0000000..ccff847
--- /dev/null
+++ b/man/countsubseqs.Rd
@@ -0,0 +1,39 @@
+\name{countsubseqs}
+\alias{countsubseqs}
+\alias{css}
+\title{Number of subsequences in an ACNUC list}
+\description{
+Returns the number of subsequences in the ACNUC list of rank \code{lrank}.
+}
+\usage{
+countsubseqs(lrank, socket = autosocket())
+css(lrank, socket = autosocket())
+}
+\arguments{
+ \item{lrank}{the rank of the ACNUC list to consider.}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+}
+
+\value{
+Numeric.
+}
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}}, \code{\link{glr}} to
+get a list rank from its name.}
+\examples{
+\dontrun{ # Need internet connection
+ choosebank("emblTP")
+ mylist<-query("mylist", "N=@", virtual = TRUE) # select all (seqs + subseqs)
+ mylist$nelem # 14138094 seqs + subseqs
+ stopifnot(mylist$nelem == 14138094)
+ css(glr("mylist")) # 1604500 subsequences only
+ stopifnot(css(glr("mylist")) == 1604500)
+ closebank()
+ }
+}
+\keyword{ utilities }
diff --git a/man/crelistfromclientdata.Rd b/man/crelistfromclientdata.Rd
new file mode 100644
index 0000000..0cf50bf
--- /dev/null
+++ b/man/crelistfromclientdata.Rd
@@ -0,0 +1,93 @@
+\name{crelistfromclientdata}
+\alias{crelistfromclientdata}
+\alias{clfcd}
+\title{To create on server an ACNUC list from data lines sent by client}
+\description{
+This function is usefull if you have a local file with sequence names
+(sequence ID), or sequence accession numbers, or species names, or
+keywords. This allows you to create on the server a list with the
+corresponding items.
+}
+\usage{
+crelistfromclientdata(listname, file, type,
+ socket = autosocket(), invisible = TRUE,
+ verbose = FALSE, virtual = FALSE)
+clfcd(listname, file, type, socket = autosocket(),
+ invisible = TRUE, verbose = FALSE, virtual = FALSE)
+}
+
+\arguments{
+ \item{listname}{The name of the list as a quoted string of chars}
+ \item{file}{The local file name}
+ \item{type}{Could be one of "SQ", "AC", "SP", "KW", see examples}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{invisible}{if \code{FALSE}, the result is returned visibly.}
+ \item{verbose}{if \code{TRUE}, verbose mode is on}
+ \item{virtual}{if \code{TRUE}, no attempt is made to retrieve the information about
+ all the elements of the list. In this case, the \code{req} component of the list is set to
+ \code{NA}.}
+}
+\details{
+\code{clfcd} is a shortcut for \code{crelistfromclientdata}.
+}
+
+\value{
+The result is directly assigned to the object \code{listname} in the user workspace.
+This is an objet of class \code{qaw}, a list with the following 6 components:
+
+ \item{call}{the original call}
+ \item{name}{the ACNUC list name}
+ \item{nelem}{the number of elements (for instance sequences) in the ACNUC list}
+ \item{typelist}{the type of the elements of the list. Could be SQ for a list of
+ sequence names, KW for a list of keywords, SP for a list of species names.}
+ \item{req}{a list of sequence names that fit the required criteria or \code{NA} when
+ called with parameter \code{virtual} is \code{TRUE}}
+ \item{socket}{the socket connection that was used}
+}
+\references{
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{
+ \code{\link{choosebank}},
+ \code{\link{query}}, \code{\link{savelist}} for the reverse operation with
+an ACNUC list of sequences.
+}
+
+\examples{
+ \dontrun{ # Need internet connection
+ choosebank("emblTP")
+ #
+ # Example with a file that contains sequence names:
+ #
+ fileSQ <- system.file("sequences/bb.mne", package = "seqinr")
+ listSQ <- crelistfromclientdata("listSQ", file = fileSQ, type = "SQ")
+ sapply(listSQ$req, getName)
+ #
+ # Example with a file that contains sequence accession numbers:
+ #
+ fileAC <- system.file("sequences/bb.acc", package = "seqinr")
+ listAC <- crelistfromclientdata("listAC", file = fileAC, type = "AC")
+ sapply(listAC$req, getName)
+ #
+ # Example with a file that contains species names:
+ #
+ fileSP <- system.file("sequences/bb.sp", package = "seqinr")
+ listSP <- crelistfromclientdata("listSP", file = fileSP, type = "SP")
+ sapply(listSP$req, getName)
+ #
+ # Example with a file that contains keywords:
+ #
+ fileKW <- system.file("sequences/bb.kwd", package = "seqinr")
+ listKW <- crelistfromclientdata("listKW", file = fileKW, type = "KW")
+ sapply(listKW$req, getName)
+ #
+ # Summary of ACNUC lists:
+ #
+ sapply(alr()$rank, getliststate)
+ closebank()
+ }
+}
+\keyword{utilities}
diff --git a/man/dia.bactgensize.Rd b/man/dia.bactgensize.Rd
new file mode 100644
index 0000000..ee356e6
--- /dev/null
+++ b/man/dia.bactgensize.Rd
@@ -0,0 +1,83 @@
+\name{dia.bactgensize}
+\alias{dia.bactgensize}
+\title{ Distribution of bacterial genome size from GOLD }
+\description{
+This function tries to download the last update of the GOLD
+(Genomes OnLine Database) to extract bacterial genomes sizes
+when available. The histogram and the default density()
+output is produced. Optionally, a maximum likelihood estimate
+of a superposition of two or three normal distributions is
+also represented.
+}
+\usage{
+dia.bactgensize(fit = 2, p = 0.5, m1 = 2000, sd1 = 600, m2 = 4500,
+ sd2 = 1000, p3 = 0.05, m3 = 9000, sd3 = 1000, maxgensize = 20000,
+ source = c("ftp://pbil.univ-lyon1.fr/pub/seqinr/data/goldtable15Dec07.txt",
+ "http://www.genomesonline.org/DBs/goldtable.txt"))
+}
+\arguments{
+ \item{fit}{ integer value. If \code{fit == O} no normal fit
+is produced, if \code{fit == 2} try to fit a superposition of
+two normal distributions, if \code{fit == 3} try to fit a
+superposition of three normal distributions.
+ }
+ \item{p}{ initial guess for the proportion of the first population. }
+ \item{m1}{ initial guess for the mean of the first population. }
+ \item{sd1}{ initial guess for the standard deviation of the first population. }
+ \item{m2}{ initial guess for the mean of the second population. }
+ \item{sd2}{initial guess for the standard deviation of the second population. }
+ \item{p3}{ initial guess for the proportion of the third population. }
+ \item{m3}{ initial guess for the mean of the third population. }
+ \item{sd3}{initial guess for the standard deviation of the third population. }
+ \item{maxgensize}{maximum admissive value in bp for a bacterial genome size:
+ only value less or equal to this threshold are considrered.}
+ \item{source}{ the file with raw data. By default a local (outdated) copy is used.}
+}
+\value{
+ An invisible dataframe with three components:
+
+ \item{genus }{genus name}
+ \item{species }{species names}
+ \item{gs }{genome size in Kb}
+
+}
+\references{
+Please cite the following references when using data from GOLD:
+
+Kyrpides, N.C. (1999) Genomes OnLine Database (GOLD 1.0): a monitor
+of complete and ongoing genome projects world-wide.
+\emph{Bioinformatics}, \bold{15}:773-774.\cr
+
+Bernal, A., Ear, U., Kyrpides, N. (2001) Genomes OnLine Database (GOLD):
+a monitor of genome projects world-wide.
+\emph{Nucleic Acids Research}, \bold{29}:126-127.\cr
+
+Liolios, K., Tavernarakis, N., Hugenholtz, P., Kyrpides, N.C. (2006)
+The Genomes On Line Database (GOLD) v.2: a monitor of genome
+projects worldwide.
+\emph{Nucleic Acids Research}, \bold{34}:D332-D334.\cr
+
+Liolios, K., Mavrommatis, K., Tavernarakis, N., Kyrpides, N.C. (2008)
+The Genomes On Line Database (GOLD) in 2007: status of genomic
+and metagenomic projects and their associated metadata.
+\emph{Nucleic Acids Research},
+\bold{in press}:D000-D000.\cr
+
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{density}} }
+\examples{
+ \dontrun{# Need internet connection
+#
+# With a local outdated copy from GOLD:
+#
+ dia.bactgensize()
+#
+# With last GOLD data:
+#
+ # The URL is no more accessible.
+ # dia.bactgensize(source = "http://www.genomesonline.org/DBs/goldtable.txt")
+ }
+}
+\keyword{ utilities }
diff --git a/man/dinucl.Rd b/man/dinucl.Rd
new file mode 100644
index 0000000..a419da2
--- /dev/null
+++ b/man/dinucl.Rd
@@ -0,0 +1,65 @@
+\name{dinucl}
+\alias{dinucl}
+\docType{data}
+\title{ Mean zscore on 242 complete bacterial chromosomes }
+\encoding{latin1}
+\description{
+This dataset contains the mean zscores as computed on all intergenic
+sequences (intergenic) and on all CDS (coding) from 242
+complete bacterial chromosomes (as retrieved from Genome Reviews database on
+June 16, 2005).
+}
+\usage{
+data(dinucl)
+}
+\format{
+ List of two dataframes of 242 chromosomes and 16 dinucleotides: one
+ for intergenic, one for coding sequences.
+\describe{
+ \item{intergenic}{the mean of \code{zscore} computed with the \code{base}
+ model on each intergenic sequence}
+ \item{coding}{the mean of \code{zscore} computed with the \code{codon}
+ model on each coding sequence}
+}
+}
+\references{
+ Palmeira, L., Gu�guen, L. and Lobry JR. (2006) UV-targeted dinucleotides
+ are not depleted in light-exposed Prokaryotic genomes.
+ \emph{Molecular Biology and Evolution},
+ \bold{23}:2214-2219.\cr
+ \url{http://mbe.oxfordjournals.org/cgi/reprint/23/11/2214}\cr\cr
+
+ \code{citation("seqinr")}
+}
+\seealso{ \code{\link{zscore}} }
+\examples{
+data(dinucl)
+par(mfrow = c(2, 2), mar = c(4,4,0.5,0.5)+0.1)
+myplot <- function(x){
+ plot(dinucl$intergenic[, x], dinucl$coding[, x],
+ xlab = "intergenic", ylab = "coding",
+ las = 1, ylim = c(-6, 4),
+ xlim = c(-3, 3), cex = 0)
+ rect(-10,-10,-1.96,10,col="yellow", border = "yellow")
+ rect(1.96,-10,10,10,col="yellow", border = "yellow")
+ rect(-10,-10,10,-1.96,col="yellow", border = "yellow")
+ rect(-10,1.96,10,10,col="yellow", border = "yellow")
+ abline(v=0,lty=3)
+ abline(h=0,lty=3)
+ abline(h=-1.96,lty=2)
+ abline(h=+1.96,lty=2)
+ abline(v=-1.96,lty=2)
+ abline(v=+1.96,lty=2)
+ points(dinucl$intergenic[, x], dinucl$coding[, x], pch = 21,
+ col = rgb(.1,.1,.1,.5), bg = rgb(.5,.5,.5,.5))
+ legend("bottomright", inset = 0.02,
+ legend = paste(substr(x,1,1), "p",
+ substr(x,2,2), " bias", sep = ""), cex = 1.25, bg = "white")
+ box()
+}
+myplot("CT")
+myplot("TC")
+myplot("CC")
+myplot("TT")
+}
+\keyword{ datasets }
diff --git a/man/dist.alignment.Rd b/man/dist.alignment.Rd
new file mode 100644
index 0000000..05e8986
--- /dev/null
+++ b/man/dist.alignment.Rd
@@ -0,0 +1,41 @@
+\name{dist.alignment}
+\alias{dist.alignment}
+\title{ Pairwise Distances from Aligned Protein or DNA/RNA Sequences }
+\description{
+ These functions compute a matrix of pairwise distances from aligned sequences
+ using similarity (Fitch matrix, for protein sequences only) or identity matrix
+ (for protein and DNA sequences).
+ The resulting matrix contains the squared root of the pairwise distances.
+ For example, if identity between 2 sequences is 80%, the matrix will gives
+ the squared root of (1.0 - 0.8) i.e. 0.4472136.
+
+}
+\usage{
+dist.alignment(x, matrix = c("similarity", "identity"),gap)
+}
+\arguments{
+ \item{x}{an object of class \code{alignment}, as returned by
+ \code{read.alignment} for instance}
+ \item{matrix}{the matrix distance to be used, partial matching allowed }
+ \item{gap}{-optional- with nucleotides, if set to 1, gaps will be counted in the identity measure }
+}
+\value{
+ The distance matrix, object of class \code{dist}, computed by using the
+ specified distance measure.
+}
+\references{
+The reference for the similarity matrix is :\cr
+Fitch, W.M. (1966) An improved method of testing for evolutionary homology.
+\emph{J. Mol. Biol.}, \bold{16}:9-16.\cr
+
+\code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry}
+\seealso{ \code{\link{read.alignment} } }
+\examples{
+ myseqs <- read.alignment(file = system.file("sequences/test.mase",
+ package = "seqinr"), format = "mase")
+ dist.alignment(myseqs, matrix = "identity" )
+ as.matrix(dist.alignment(myseqs, matrix = "identity" ))
+}
+\keyword{ manip }
diff --git a/man/dotPlot.Rd b/man/dotPlot.Rd
new file mode 100644
index 0000000..5b5f1d1
--- /dev/null
+++ b/man/dotPlot.Rd
@@ -0,0 +1,88 @@
+\name{dotPlot}
+\alias{dotPlot}
+\title{ Dot Plot Comparison of two sequences }
+\description{
+ Dot plots are most likely the oldest visual representation used to compare
+ two sequences (see Maizel and Lenk 1981 and references therein). In its
+ simplest form, a dot is produced at position (i,j) iff character number
+ i in the first sequence is the same as character number j in the
+ second sequence. More eleborated forms use sliding windows and a threshold
+ value for two windows to be considered as matched.
+}
+\usage{
+dotPlot(seq1, seq2, wsize = 1, wstep = 1, nmatch = 1, col = c("white", "black"),
+xlab = deparse(substitute(seq1)), ylab = deparse(substitute(seq2)), ...)
+}
+\arguments{
+ \item{seq1}{ the first sequence (x-axis) as a vector of single chars.}
+ \item{seq2}{ the second sequence (y-axis) as a vector of single char.}
+ \item{wsize}{ the size in chars of the moving window.}
+ \item{wstep}{ the size in chars for the steps of the moving window.
+ Use \code{wstep == wsize} for non-overlapping windows.}
+ \item{nmatch}{ if the number of match per window is greater than or equal
+ to \code{nmatch} then a dot is produced.}
+ \item{col}{ color of points passed to \code{image}.}
+ \item{xlab}{ label of x-axis passed to \code{image}.}
+ \item{ylab}{ label of y-axis passed to \code{image}.}
+ \item{...}{ further arguments passed to \code{image}.}
+}
+
+\value{
+ NULL.
+}
+\references{
+Maizel, J.V. and Lenk, R.P. (1981) Enhanced Graphic Matrix Analysis of
+Nucleic Acid and Protein Sequences.
+\emph{Proceedings of the National Academy of Science USA},
+\bold{78}:7665-7669.\cr
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{image}} }
+\examples{
+#
+# Identity is on the main diagonal:
+#
+dotPlot(letters, letters, main = "Direct repeat")
+#
+# Internal repeats are off the main diagonal:
+#
+dotPlot(rep(letters, 2), rep(letters, 2), main = "Internal repeats")
+#
+# Inversions are orthogonal to the main diagonal:
+#
+dotPlot(letters, rev(letters), main = "Inversion")
+#
+# Insertion in the second sequence yields a vertical jump:
+#
+dotPlot(letters, c(letters[1:10], s2c("insertion"), letters[11:26]),
+ main = "Insertion in the second sequence", asp = 1)
+#
+# Insertion in the first sequence yields an horizontal jump:
+#
+dotPlot(c(letters[1:10], s2c("insertion"), letters[11:26]), letters,
+ main = "Insertion in the first sequence", asp = 1)
+#
+# Protein sequences have usually a good signal/noise ratio because there
+# are 20 possible amino-acids:
+#
+aafile <- system.file("sequences/seqAA.fasta", package = "seqinr")
+protein <- read.fasta(aafile)[[1]]
+dotPlot(protein, protein, main = "Dot plot of a protein\nwsize = 1, wstep = 1, nmatch = 1")
+#
+# Nucleic acid sequences have usually a poor signal/noise ratio because
+# there are only 4 different bases:
+#
+dnafile <- system.file("sequences/malM.fasta", package = "seqinr")
+dna <- protein <- read.fasta(dnafile)[[1]]
+dotPlot(dna[1:200], dna[1:200],
+ main = "Dot plot of a nucleic acid sequence\nwsize = 1, wstep = 1, nmatch = 1")
+#
+# Play with the wsize, wstep and nmatch arguments to increase the
+# signal/noise ratio:
+#
+dotPlot(dna[1:200], dna[1:200], wsize = 3, wstep = 3, nmatch = 3,
+main = "Dot plot of a nucleic acid sequence\nwsize = 3, wstep = 3, nmatch = 3")
+}
+\keyword{ utilities }
diff --git a/man/dotchart.uco.Rd b/man/dotchart.uco.Rd
new file mode 100644
index 0000000..64c7ad8
--- /dev/null
+++ b/man/dotchart.uco.Rd
@@ -0,0 +1,60 @@
+\name{dotchart.uco}
+\alias{dotchart.uco}
+\title{Cleveland plot for codon usage tables }
+\description{
+Draw a Cleveland dot plot for codon usage tables
+}
+\usage{
+dotchart.uco(x, numcode = 1, aa3 = TRUE, cex = 0.7, alphabet = s2c("tcag"),
+ pch = 21, gpch = 20, bg = par("bg"), color = par("fg"), gcolor = par("fg"),
+lcolor = "gray", xlim, ...)
+}
+\arguments{
+ \item{x}{table of codon usage as computed by \code{uco}. }
+ \item{numcode}{the number of the code to be used by \code{translate}.}
+ \item{aa3}{logical. If TRUE use the three-letter code for amino-
+acids. If FALSE use the one-letter code for amino-acids. }
+ \item{cex}{the character size to be used. }
+ \item{alphabet}{character for codons labels}
+ \item{pch}{the plotting character or symbol to be used.}
+ \item{gpch}{the plotting character or symbol to be used for group values. }
+ \item{bg}{the background color to be used. }
+ \item{color}{the color(s) to be used for points an labels. }
+ \item{gcolor}{the single color to be used for group labels and values.}
+ \item{lcolor}{the color(s) to be used for the horizontal lines.}
+ \item{xlim}{horizontal range for the plot }
+ \item{\dots}{graphical parameters can also be specified as arguments}
+}
+\value{
+An invisible list with components:
+\item{x}{table of codon usage}
+\item{labels}{codon names}
+\item{groups}{amino acid factor}
+\item{gdata}{sums by amino acid}
+\item{ypg}{the y-axis coordinates for amino acids}
+\item{ypi}{the y-axis coordinates for codons}
+}
+\references{ Cleveland, W. S. (1985) The Elements of Graphing Data.
+Monterey, CA: Wadsworth.
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{dotchart}}, \code{\link{uco}}, \code{\link{aaa}},
+\code{\link{translate}} }
+\examples{
+# Load dataset:
+data(ec999)
+# Compute codon usage for all coding sequences:
+ec999.uco <- lapply(ec999, uco, index="eff")
+# Put it in a dataframe:
+df <- as.data.frame(lapply(ec999.uco, as.vector))
+# Add codon names:
+row.names(df) <- names(ec999.uco[[1]])
+# Compute global codon usage:
+global <- rowSums(df)
+# Choose a title for the graph:
+title <- "Codon usage in 999 E. coli coding sequences"
+# Plot data:
+dotchart.uco(global, main = title)
+}
+\keyword{hplot}
diff --git a/man/draw.oriloc.Rd b/man/draw.oriloc.Rd
new file mode 100644
index 0000000..5835867
--- /dev/null
+++ b/man/draw.oriloc.Rd
@@ -0,0 +1,69 @@
+\name{draw.oriloc}
+\alias{draw.oriloc}
+\title{Graphical representation for nucleotide skews in
+ prokaryotic chromosomes.}
+\description{
+ Graphical representation for nucleotide skews in
+ prokaryotic chromosomes.}
+
+\usage{draw.oriloc(ori, main = "Title",
+ xlab = "Map position in Kb",
+ ylab = "Cumulated combined skew in Kb", las = 1, las.right = 3,
+ ta.mtext = "Cumul. T-A skew", ta.col = "pink", ta.lwd = 1,
+ cg.mtext = "Cumul. C-G skew", cg.col = "lightblue", cg.lwd = 1,
+ cds.mtext = "Cumul. CDS skew", cds.col = "lightgreen", cds.lwd = 1,
+ sk.col = "black", sk.lwd = 2,
+ add.grid = TRUE, ...)}
+
+\arguments{
+
+\item{ori}{A data frame obtained with the \code{oriloc} function.}
+\item{main}{The main title of the plot.}
+\item{xlab}{The x-axis title.}
+\item{ylab}{The y-axis title.}
+\item{las}{The style of axis labels for the bottom and left axes.}
+\item{las.right}{The style of axis labels for the right axis.}
+\item{ta.mtext}{The marginal legend for the TA skew.}
+\item{ta.col}{The color for the TA skew.}
+\item{ta.lwd}{The line width for the TA skew.}
+\item{cg.mtext}{The marginal legend for the CG skew.}
+\item{cg.col}{The color for the CG skew.}
+\item{cg.lwd}{The line width for the CG skew.}
+\item{cds.mtext}{The marginal legend for the CDS skew.}
+\item{cds.col}{The color for the CDS skew.}
+\item{cds.lwd}{The line width for the CDS skew.}
+\item{sk.col}{The color for the cumulated combined skew.}
+\item{sk.lwd}{The line width for the cumulated combined skew.}
+\item{add.grid}{Logical, if \code{TRUE} a vertical grid is added to the plot.}
+\item{...}{Further arguments are passed to the function \code{plot}.}
+}
+
+\references{
+\code{citation("seqinr")}
+}
+
+\author{J.R. Lobry}
+
+\seealso{ \code{\link{oriloc}}, \code{\link{rearranged.oriloc}},
+ \code{\link{extract.breakpoints}} }
+
+\examples{
+\dontrun{ # need internet connection
+#
+# Example with Chlamydia trachomatis complete genome
+#
+ ori <- oriloc()
+ draw.oriloc(ori)
+#
+# The same, using more options from function draw.oriloc()
+#
+draw.oriloc(ori,
+ main = expression(italic(Chlamydia~~trachomatis)~~complete~~genome),
+ ta.mtext = "TA skew", ta.col = "red",
+ cg.mtext = "CG skew", cg.col = "blue",
+ cds.mtext = "CDS skew", cds.col = "seagreen",
+ add.grid = FALSE)
+}
+}
+
+\keyword{hplot}
diff --git a/man/draw.rearranged.oriloc.Rd b/man/draw.rearranged.oriloc.Rd
new file mode 100644
index 0000000..e169f02
--- /dev/null
+++ b/man/draw.rearranged.oriloc.Rd
@@ -0,0 +1,66 @@
+\name{draw.rearranged.oriloc}
+\alias{draw.rearranged.oriloc}
+\title{Graphical representation for rearranged nucleotide skews in
+ prokaryotic chromosomes.}
+\description{
+ Graphical representation for rearranged nucleotide skews in
+ prokaryotic chromosomes.}
+
+\usage{draw.rearranged.oriloc(rearr.ori, breaks.gcfw = NA,
+ breaks.gcrev = NA, breaks.atfw = NA, breaks.atrev = NA)}
+
+\arguments{
+
+\item{rearr.ori}{A data frame obtained with the \code{rearranged.oriloc}
+ function. }
+
+\item{breaks.gcfw}{The coordinates of the breakpoints in the GC-skew,
+ for forward transcribed protein coding sequences. These coordinates
+ can be obtained with the \code{extract.breakpoints} function. }
+\item{breaks.gcrev}{The coordinates of the breakpoints in the GC-skew,
+ for reverse transcribed protein coding sequences. These coordinates
+ can be obtained with the \code{extract.breakpoints} function. }
+\item{breaks.atfw}{The coordinates of the breakpoints in the AT-skew,
+ for forward transcribed protein coding sequences. These coordinates
+ can be obtained with the \code{extract.breakpoints} function. }
+\item{breaks.atrev}{The coordinates of the breakpoints in the AT-skew,
+ for reverse transcribed protein coding sequences. These coordinates
+ can be obtained with the \code{extract.breakpoints} function. }
+
+}
+
+\references{
+ Necşulea, A. and Lobry, J.R. (2007) A New Method for Assessing the
+ Effect of Replication on DNA Base Composition Asymmetry.
+ \emph{Molecular Biology and Evolution}, \bold{24}:2169-2179.
+}
+\author{J.R. Lobry, A. Necşulea}
+\seealso{ \code{\link{rearranged.oriloc}},
+ \code{\link{extract.breakpoints}} }
+
+\examples{
+
+\dontrun{
+### Example for Chlamydia trachomatis ####
+
+### Rearrange the chromosome and compute the nucleotide skews ###
+
+#r.ori <- rearranged.oriloc(seq.fasta = system.file("sequences/ct.fasta.gz", package = "seqinr"),
+# g2.coord = system.file("sequences/ct.coord", package = "seqinr"))
+
+r.ori <- rearranged.oriloc(seq.fasta = system.file("sequences/ct.fasta.gz", package = "seqinr"),
+ g2.coord = system.file("sequences/ct.coord", package = "seqinr"))
+
+
+
+### Extract the breakpoints for the rearranged nucleotide skews ###
+
+breaks <- extract.breakpoints(r.ori, type = c("gcfw", "gcrev"),
+ nbreaks = c(2, 2), gridsize = 50, it.max = 100)
+
+### Draw the rearranged nucleotide skews and ###
+### place the position of the breakpoints on the graphics ###
+
+draw.rearranged.oriloc(r.ori, breaks.gcfw = breaks$gcfw$breaks,
+ breaks.gcrev = breaks$gcrev$breaks)}
+}
diff --git a/man/draw.recstat.Rd b/man/draw.recstat.Rd
new file mode 100755
index 0000000..72626c9
--- /dev/null
+++ b/man/draw.recstat.Rd
@@ -0,0 +1,34 @@
+\name{draw.recstat}
+\alias{draw.recstat}
+\title{Graphical representation of a recstat analysis.}
+\description{This function displays the results returned by \code{recstat} with two plots. The
+first one shows the factor scores of a CA computed on the codon composition of a DNA sequence.
+The second one shows the locations of all Start and Stop codons in this sequence.}
+\usage{draw.recstat(rec, fac = 1, direct = TRUE, xlim = c(1, seqsize),
+ col = c("red", "blue", "purple"))}
+\arguments{
+ \item{rec}{list of elements returned by \code{recstat} function.}
+ \item{fac}{axis of the CA to use for display (4 \eqn{\ge} \code{fac}
+ \eqn{\ge} 1).}
+ \item{direct}{a logical for the choice of direct or reverse strand.}
+ \item{xlim}{starting and ending positions in the sequence for the plot.}
+ \item{col}{vector of colour codes for the three frames of the sequence.}
+}
+\details{The first plot shows the factor scores of the sliding windows, this for the three
+possible frames of the strand selected by the user. The second shows the Start (filled grey
+triangles pointing up) and Stop (solid black triangles pointing down) codons positions. Note
+that the standard genetic code is used for that purpose. Visual detection of putative CDS is
+performed through the simultaneous use of these two graphics. If a CDS is located within the
+sequence, the factor scores for the windows located in the corresponding reading frame will be
+significantly separated from the two others. Moreover, the region where this separation is seen
+should be located between a Start and a Stop codon.}
+\author{O. Clerc, G. Perrière}
+\seealso{\code{\link{test.li.recstat}}, \code{\link{test.co.recstat}}}
+\examples{
+ff <- system.file("sequences/ECOUNC.fsa", package = "seqinr")
+seq <- read.fasta(ff)
+rec <- recstat(seq[[1]], seqname = getName(seq))
+draw.recstat(rec)
+}
+\keyword{correspondence analysis}
+\keyword{sequence}
\ No newline at end of file
diff --git a/man/ec999.Rd b/man/ec999.Rd
new file mode 100644
index 0000000..32d08f8
--- /dev/null
+++ b/man/ec999.Rd
@@ -0,0 +1,36 @@
+\name{ec999}
+\alias{ec999}
+\docType{data}
+\title{ 999 coding sequences from E. coli }
+\description{
+This dataset contains 999 coding sequences from the Escherichia
+coli chromosome}
+\usage{data(ec999)}
+\format{ List of 999 vectors of characters, one for each coding
+sequence.
+\describe{
+ \item{ECFOLE.FOLE }{chr [1:672] "A" "T" "G" "C" ...}
+ \item{ECMSBAG.MSBA }{chr [1:1749] "A" "T" "G" "C" ...}
+ \item{ECNARZYW-C.NARV}{chr [1:681] "A" "T" "G" "A" ...}
+ \item{... }{ ... TRUNCATED ... }
+ \item{XYLEECOM.MALK }{chr [1:1116] "A" "T" "G" "G" ...}
+ \item{XYLEECOM.LAMB }{chr [1:1341] "A" "T" "G" "A" ...}
+ \item{XYLEECOM.MALM }{chr [1:921] "A" "T" "G" "A" ...}
+}
+}
+\references{
+Lobry, J.R., Gautier, C. (1994) Hydrophobicity,
+expressivity and aromaticity are the major trends of amino-acid usage in
+999 \emph{Escherichia coli} chromosome-encode genes. \emph{Nucleic Acids
+ Research},\bold{22}:3174-3180.
+
+\code{citation("seqinr")}
+}
+\examples{
+data(ec999)
+#
+# How to export sequences in a FASTA file:
+#
+write.fasta(ec999, names(ec999), file = "ec999.ffn")
+}
+\keyword{datasets}
diff --git a/man/extract.breakpoints.Rd b/man/extract.breakpoints.Rd
new file mode 100644
index 0000000..3a57201
--- /dev/null
+++ b/man/extract.breakpoints.Rd
@@ -0,0 +1,91 @@
+\name{extract.breakpoints}
+\alias{extract.breakpoints}
+\title{Extraction of breakpoint positions on the
+ rearranged nucleotide skews. }
+\description{
+ Extraction of breakpoint positions on the
+ rearranged nucleotide skews.
+ }
+
+\usage{extract.breakpoints(rearr.ori,
+type = c("atfw", "atrev", "gcfw", "gcrev"),
+ nbreaks, gridsize = 100, it.max = 500)}
+
+\arguments{
+\item{rearr.ori}{A data frame obtained with the \code{rearranged.oriloc}
+ function. }
+\item{type}{The type of skew for which to extract the breakpoints; must
+ be a subset of \code{c("atfw","atrev","gcfw","gcrev")}.}
+\item{nbreaks}{The number of breakpoints to extract for each type of
+ skew. Provide a vector of the same length as \code{type}.}
+\item{gridsize}{To make sure that the best breakpoints are found, and to
+avoid finding only a local extremum of the likelihood and residual sum
+of square functions, a grid search is performed. The search for
+breakpoints is repeated \code{gridsize} times, with different starting
+values for the breakpoints. }
+\item{it.max}{The maximum number of iterations to be performed when
+ searching for the breakpoints. This argument corresponds to the
+ \code{it.max} argument in \code{segmented}.}
+}
+\details{
+ This method uses the \code{segmented} function in the \code{segmented}
+ package to extract the breakpoints positions in the rearranged
+ nucleotide skews obtained with the \code{rearranged.oriloc} function.
+To make sure that the best breakpoints are found, and to
+avoid finding only a local extremum of the likelihood and residual sum
+of square functions, a grid search is performed. The search for
+breakpoints is repeated \code{gridsize} times, with different starting
+values for the breakpoints.
+
+}
+
+\value{
+
+ This function returns a list, with as many elements as the \code{type}
+ argument (for example \code{$gcfw} will contain the results for the
+ rearranged GC-skew, for forward-encoded genes). Each element of this list is also a list, containing the
+ following information: in \code{$breaks} the position of the breakpoints on the
+ rearranged chromosome; in \code{$slopes.left} the slopes of the
+ segments on the left side of each breakpoint; in \code{$slopes.right} the slopes of the
+ segments on the right side of each breakpoint; in \code{$real.coord},
+ the coordinates of the breakpoints on the real chromosome (before rearrangement).
+
+ }
+
+\references{
+
+ \code{citation("segmented")}
+
+ Necşulea, A. and Lobry, J.R. (in prep) A novel method for assessing
+ the effect of replication on DNA base composition asymmetry.
+ \emph{Molecular Biology and Evolution},\bold{24}:2169-2179.
+
+}
+\author{A. Necşulea}
+\seealso{ \code{\link{oriloc}}, \code{\link{draw.rearranged.oriloc}},
+ \code{\link{rearranged.oriloc}} }
+
+\examples{
+
+### Example for Chlamydia trachomatis ####
+
+### Rearrange the chromosome and compute the nucleotide skews ###
+
+\dontrun{r.ori <- rearranged.oriloc(seq.fasta = system.file("sequences/ct.fasta.gz", package = "seqinr"),
+ g2.coord = system.file("sequences/ct.coord",package = "seqinr"))}
+
+### Extract the breakpoints for the rearranged nucleotide skews ###
+
+
+\dontrun{breaks <- extract.breakpoints(r.ori,type = c("gcfw", "gcrev"),
+ nbreaks = c(2, 2), gridsize = 50, it.max = 100)}
+
+
+### Draw the rearranged nucleotide skews and ###
+### place the position of the breakpoints on the graphics ###
+
+\dontrun{draw.rearranged.oriloc(r.ori, breaks.gcfw = breaks$gcfw$breaks,
+ breaks.gcrev = breaks$gcrev$breaks)}
+}
+
+\keyword{utilities}
diff --git a/man/extractseqs.Rd b/man/extractseqs.Rd
new file mode 100644
index 0000000..f3eea31
--- /dev/null
+++ b/man/extractseqs.Rd
@@ -0,0 +1,89 @@
+\name{extractseqs}
+\alias{extractseqs}
+\alias{exseq}
+\title{To extract the sequences information of a sequence or a list of sequence in different formats}
+\description{
+ The function allows to extract large amount of data as whole genome sequences,using different output formats and types of extraction.
+ This function is not yet available for windows in zlib mode.
+}
+
+\usage{
+extractseqs(listname,socket = autosocket(), format="fasta",
+operation="simple",feature="xx", bounds="xx", minbounds="xx",
+ verbose = FALSE, nzlines=1000, zlib = FALSE)
+exseq(listname,socket = autosocket(),
+ format="fasta",operation="simple", feature="xx",
+ bounds="xx", minbounds="xx", verbose = FALSE, nzlines=1000, zlib = FALSE)
+}
+
+\arguments{
+ \item{listname}{the name of list on server (may be a virtual list) }
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{format}{the format of output.Can be \code{acnuc}, \code{fasta},\code{flat} or \code{coordinates} }
+ \item{operation}{the type of extraction. Can be \code{simple}, \code{translate}, \code{fragment}, \code{feature} or \code{region} }
+ \item{feature}{-optional- the feature to be extracted (for operations "feature" or "region"): a feature table item (CDS, mRNA,...)}
+ \item{bounds}{-optional- the bounds for extraction (for operations "fragment" or "region") }
+ \item{minbounds}{-optional- the minimal bounds for extraction (for operations "fragment" or "region") }
+ \item{verbose}{if \code{TRUE}, verbose mode is on}
+ \item{nzlines}{number of line in zlib mode}
+ \item{zlib}{logical. If TRUE sequences are download in zlib compress mode.}
+}
+
+\details{
+ To extract a list of sequences (lrank argument) or a single sequence (seqnum argument)
+using different output formats and types of extraction.
+All formats except "coordinates" extract sequence data.
+Format "coordinates" extract coordinate data; start > end indicates the complementary strand.
+\describe{
+ \item{\strong{listname}}{sequence list name.}
+ \item{\strong{socket}}{a socket of class connection and sockconn returned by \code{choosebank}.
+ Default value (auto) means that the socket will be set to to the socket component of the banknameSocket variable. }
+ \item{\strong{format}}{\code{acnuc}, \code{fasta}, \code{flat} or \code{coordinates}}
+ \item{\strong{operation}}{\code{simple}, \code{translate}, \code{fragment}, \code{feature} or \code{region}}
+ \item{\strong{feature}}{(for operations "feature" or "region") a feature table item (CDS, mRNA,...).
+ \describe{
+ \item{\emph{simple}}{each sequence or subsequence is extracted.}
+ \item{\emph{translate}}{meaningful only for protein-coding (sub)sequences that are extracted as protein sequences. Nothing is extracted for non-protein coding sequences.}
+ \item{\emph{fragment}}{Allows to extract any part of the sequence(s) in list.
+ Such part is specified by the bounds and minbounds arguments according to the syntax suggested by these examples:
+ \tabular{lll}{
+ \tab 132,1600 \tab to extract from nucl. 132 to nucl 1600 of the sequence. If applied to a subsequence, coordinates are in the parent seq relatively to the subsequence start point.\cr
+ \tab -10,10 \tab to extract from 10 nucl. BEFORE the 5' end of the sequence to nucl. 10 of it. Useful only for subsequences, and produces a fragment extracted from its parent sequence.\cr
+ \tab e-20,e+10 \tab to extract from 20 nucl. BEFORE the 3' end of the sequence to 10 nucl. AFTER its 3' end. Useful only for subsequences, and produces a fragment extracted from its parent sequence.\cr
+ \tab -20,e+5 \tab to extract from 20 nucl. BEFORE the 5' end of the sequence to 5 nucl. AFTER its 3' end.
+ }
+ }
+ }}
+ \item{\strong{bounds}}{(for operations "fragment" or "region") see syntax above.}
+ \item{\strong{minbounds}}{same syntax as bounds. When the sequence data is too short for this quantity
+ to be extracted, nothing is extracted. When the sequence data is between minbounds and bounds,
+ extracted sequence data is extended by N's to the desired length.}
+
+}
+
+}
+
+\value{
+ Sequence data.
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{S. Penel}
+\seealso{
+ \code{\link{choosebank}},
+ \code{\link{query}}
+ \code{\link{getlistrank}}
+}
+\examples{
+ \dontrun{# Need internet connection
+ choosebank("emblTP")
+ mylist <- query("mylist", "k=globin", virtual = TRUE)
+ mylist.fasta <- exseq("mylist", verbose = TRUE)
+ # 103 lines of FASTA
+ stopifnot(length(mylist.fasta) == 103)
+ closebank()
+ }
+}
+\keyword{utilities}
diff --git a/man/fasta.Rd b/man/fasta.Rd
new file mode 100644
index 0000000..d1fc0e9
--- /dev/null
+++ b/man/fasta.Rd
@@ -0,0 +1,10 @@
+\name{fasta}
+ \docType{data}
+ \alias{fasta}
+ \title{Example of results obtained after a call to read.alignment}
+ \description{This data set gives an example of a amino acids alignment obtained after a call to the function read.alignment on an alignment file in "fasta" format.}
+ \usage{fasta}
+ \format{A List of class alignment}
+ \source{https://pbil.univ-lyon1.fr/help/formats.html/}
+ \references{Pearson W.R. and Lipman D.J. (1988) \emph{Improved tools for biological sequence comparison.}.Proc Natl Acad Sci U S A. 85(8):2444-8.}
+ \keyword{datasets}
diff --git a/man/fastacc.Rd b/man/fastacc.Rd
new file mode 100644
index 0000000..ffd62b7
--- /dev/null
+++ b/man/fastacc.Rd
@@ -0,0 +1,196 @@
+\name{fastacc}
+\alias{fastacc}
+\title{Fast Allele in Common Count}
+\description{
+The purpose of this function is to compute as fast as possible the number
+of allele in common between a target (typically the genetic profile observed at a
+crime scene, possibly a mixture with dropouts) and a database reference (typically
+genetic profile of individuals). Both are assumed to be pre-encoded at the bit
+level in a consistent way.
+}
+\usage{
+fastacc(target, database)
+}
+\arguments{
+ \item{target}{the \code{\link{raw}} encoding of the target, typically 40 octets for a core-CODIS profile in 2009}
+ \item{database}{the \code{\link{raw}} encoding of the database. If there are n entries
+in the database, then the database must n times longer than the target.}
+}
+\details{
+This function is an RFC state. Comments are welcome.
+
+Genetic profiles are encoded at the bit level. One bit represents one allele.
+Count is based on a logical AND at bit level. Bit count is encoded at C level
+using the precomputed approach: one indirection with an auxiliary table
+of size 256 called \code{bits_in_char} which is pre-computed at R level and
+passed at C level.
+}
+\value{
+A vector of \code{\link{integer}} giving for each entry in the database how many
+alleles are in common between the entry and the target.
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+
+\section{Warning }{Experimental, first release schedulded for seqinr 2.0-6 by the end of 2009}
+
+\seealso{
+FIXME
+}
+\examples{
+#
+# NOTE:
+#
+# This example section is a proof-of-concept stuff. Most code should be
+# enbeded in documented functions to avoid verbosity. But at the RFC stage
+# this is perhaps not a too bad idea to show how powerfull R is.
+#
+
+#
+# Let's start from the 16 loci available in the AmpFLSTR kit:
+#
+
+path <- system.file("abif/AmpFLSTR_Bins_v1.txt", package = "seqinr")
+resbin <- readBins(path)
+codis <- resbin[["Identifiler_CODIS_v1"]]
+names(codis)
+
+#
+# We count how many different alleles are present per locus:
+#
+
+na <- unlist(lapply(codis, function(x) length(x[[1]])))
+na
+
+#
+# The number of octets required to encode a genetic for each locus is then:
+#
+
+ceiling(na/8)
+
+#
+# We need then a total of 40 octets to code these profiles:
+#
+
+sum(ceiling(na/8))
+
+#
+# Let's definene a function to encode a profile at a given locus, and vice versa :
+#
+
+prof2raw <- function(profile, alleles) {
+ if (!is.ordered(alleles)) stop("ordered factor expected for alleles")
+ if (!is.character(profile)) stop("vector of character expected for profile")
+ noctets <- ceiling(length(alleles)/8)
+ res.b <- rawToBits(raw(noctets))
+ for (i in 1:length(profile)) {
+ res.b[which(profile[i] == alleles)] <- as.raw(1)
+ }
+ return(packBits(res.b, type = "raw"))
+}
+
+raw2prof <- function(rawdata, alleles) {
+ if (!is.ordered(alleles)) stop("ordered factor expected for alleles")
+ if (!is.raw(rawdata)) stop("vector of raw expected for rawdata")
+ res <- as.character(alleles)[as.logical(rawToBits(rawdata))]
+ return(paste(res, collapse = ", "))
+}
+
+#
+# Let now code all alleles present in codis as ordered factors:
+#
+
+allalleles <- lapply(codis, function(x) factor(x[, 1], levels = x[, 1], ordered = TRUE))
+
+#
+# Let's play with our encoding/decoding utilities with first locus:
+#
+
+allalleles[[1]] # <8 8 9 10 11 12 13 14 15 16 17 18 19 >19
+res <- prof2raw(c("8", "9", "13", "14", ">19"), allalleles[[1]])
+res # c6 20
+rawToBits(res) # 00 01 01 00 00 00 01 01 00 00 00 00 00 01 00 00
+raw2prof(res, allalleles[[1]]) # "8, 9, 13, 14, >19"
+
+#
+# Let define a profile with all possible alleles:
+#
+
+ladder <- unlist(lapply(allalleles, function(x) prof2raw(as.character(x),x)))
+names(ladder) <- NULL
+stopifnot(identical(as.integer(ladder),
+ c(255L, 63L, 255L, 255L, 255L, 63L, 255L, 63L, 255L, 31L, 255L,
+ 63L, 255L, 255L, 7L, 255L, 3L, 255L, 63L, 255L, 255L, 255L, 255L,
+ 15L, 255L, 127L, 255L, 3L, 255L, 255L, 255L, 255L, 3L, 3L, 255L,
+ 15L, 255L, 255L, 255L, 7L))) # simple sanity check
+
+#
+# Let's make a simulated database. Here we use a random sampling
+# with a uniform distribution between all possible profile possible
+# at a given locus. A more realist sampling for an individual database
+# would be to sample only two alleles at each locus according to
+# observed frequencies in populations.
+#
+
+n <- 10^5 # the number of records in the database
+DB <- sapply(ladder, function(x) as.raw(sample(0:as.integer(x), size = n, replace = TRUE)))
+
+#
+# Now we make sure that the target is in the database:
+#
+
+target <- DB[666, ]
+DB <- as.vector(t(DB)) # put DB as a flat database (is it usefull?)
+
+#
+# Now we compute the number of alleles in common between the
+# target and all the entries in the DB:
+#
+
+system.time(res <- fastacc(target,DB)) # Fast, isn't it ?
+stopifnot(which.max(res) == 666) # sanity check
+
+#
+# Don't run : too tedious for routine check. We check here that complexity is
+# linear in time up to a 10 10^6 database size (roughly the size of individual
+# profiles at the EU level)
+#
+
+\dontrun{
+maxn <- 10^7
+DB <- sapply(ladder, function(x) as.raw(sample(0:as.integer(x),
+ size = maxn, replace = T)))
+target <- DB[666, ]
+DB <- as.vector(t(DB))
+
+np <- 10
+nseq <- seq(from = 10^5, to = maxn, length = np)
+res <- numeric(np)
+i <- 1
+for (n in nseq) {
+ print(i)
+ res[i] <- system.time(tmp <- fastacc(target, DB[1:n]))[1]
+ stopifnot(which.max(tmp) == 666)
+ i <- i + 1
+}
+dbse <- data.frame(list(nseq = nseq, res = res))
+
+x <- dbse$nseq
+y <- dbse$res
+plot(x, y, type = "b", xlab = "Number of entries in DB", ylab = "One query time [s]",
+las = 1, xlim = c(0, maxn), ylim = c(0, max(y)), main = "Data base size effect on query time")
+lm1 <- lm(y ~ x - 1)
+abline(lm1, col = "red")
+legend("topleft", inset = 0.01, legend = paste("y =", formatC(lm1$coef[1],
+digits = 3), "x"), col = "red", lty = 1)
+
+#
+# On my laptop the slope is 2.51e-08, that is a 1/4 of second to scan a database
+# with 10 10^6 entries.
+#
+}
+
+## end
+}
diff --git a/man/figures/aka.pdf b/man/figures/aka.pdf
new file mode 100644
index 0000000..bc174eb
Binary files /dev/null and b/man/figures/aka.pdf differ
diff --git a/man/figures/chargaff.png b/man/figures/chargaff.png
new file mode 100644
index 0000000..ce8eb06
Binary files /dev/null and b/man/figures/chargaff.png differ
diff --git a/man/figures/gcskewmbe96.pdf b/man/figures/gcskewmbe96.pdf
new file mode 100644
index 0000000..1567c12
Binary files /dev/null and b/man/figures/gcskewmbe96.pdf differ
diff --git a/man/figures/introduction-dbg.pdf b/man/figures/introduction-dbg.pdf
new file mode 100644
index 0000000..bc0219b
Binary files /dev/null and b/man/figures/introduction-dbg.pdf differ
diff --git a/man/figures/lncs2004.pdf b/man/figures/lncs2004.pdf
new file mode 100644
index 0000000..f877bb2
Binary files /dev/null and b/man/figures/lncs2004.pdf differ
diff --git a/man/figures/waterabs.jpg b/man/figures/waterabs.jpg
new file mode 100644
index 0000000..5fa76ba
Binary files /dev/null and b/man/figures/waterabs.jpg differ
diff --git a/man/gb2fasta.Rd b/man/gb2fasta.Rd
new file mode 100644
index 0000000..17872b7
--- /dev/null
+++ b/man/gb2fasta.Rd
@@ -0,0 +1,40 @@
+\name{gb2fasta}
+\alias{gb2fasta}
+\title{Conversion of GenBank file into fasta file}
+\description{
+Converts a single entry in GenBank format into a fasta file.
+}
+\usage{
+gb2fasta(source.file, destination.file)
+}
+\arguments{
+ \item{source.file}{ GenBank file }
+ \item{destination.file}{ Fasta file }
+}
+\details{
+Multiple entries in GenBank file are not supported.
+}
+\value{
+ none
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{oriloc}} }
+\examples{
+ myGenBankFile <- system.file("sequences/ct.gbk.gz", package = "seqinr")
+ myFastaFileName <- "Acinetobacter_ADP1_uid61597.fasta"
+ gb2fasta(myGenBankFile, myFastaFileName)
+ readLines(myFastaFileName)[1:5]
+ #
+ # Should be :
+ #
+ # [1] ">CHLTCG 1042519 bp"
+ # [2] "gcggccgcccgggaaattgctaaaagatgggagcaaagagttagagatctacaagataaa"
+ # [3] "ggtgctgcacgaaaattattaaatgatcctttaggccgacgaacacctaattatcagagc"
+ # [4] "aaaaatccaggtgagtatactgtagggaattccatgttttacgatggtcctcaggtagcg"
+ # [5] "aatctccagaacgtcgacactggtttttggctggacatgagcaatctctcagacgttgta"
+ #
+}
+\keyword{utilities}
diff --git a/man/gbk2g2.Rd b/man/gbk2g2.Rd
new file mode 100644
index 0000000..01ebc27
--- /dev/null
+++ b/man/gbk2g2.Rd
@@ -0,0 +1,38 @@
+\name{gbk2g2}
+\alias{gbk2g2}
+\title{ Conversion of a GenBank format file into a glimmer-like one }
+\description{
+This function reads a file in GenBank format and converts the features
+corresponding to CDS (Coding Sequences) into a format similar to
+glimmer program output.
+}
+\usage{
+gbk2g2(gbkfile = "ftp://pbil.univ-lyon1.fr/pub/seqinr/data/ct.gbk",
+g2.coord = "g2.coord")
+}
+
+\arguments{
+ \item{gbkfile}{ The name of the GenBank file }
+ \item{g2.coord}{ The name of the output file in glimmer-like format }
+}
+\details{
+Partial CDS (either 5' or 3') and join in features are discarded.
+}
+\value{
+The input file is returned invisibly.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{oriloc}} which uses glimmer-like files,
+ \code{\link{gbk2g2.euk}} for eukaryotic sequences with introns.}
+\examples{
+ \dontrun{ # need internet connection
+ suppressWarnings(gbk2g2(g2.coord = "gbk2g2.test"))
+ res <- read.table("gbk2g2.test")
+ head(res)
+ stopifnot(nrow(res) == 892)
+ }
+}
+\keyword{utilities}
diff --git a/man/gbk2g2.euk.Rd b/man/gbk2g2.euk.Rd
new file mode 100644
index 0000000..e739fa8
--- /dev/null
+++ b/man/gbk2g2.euk.Rd
@@ -0,0 +1,38 @@
+\name{gbk2g2.euk}
+\alias{gbk2g2.euk}
+\title{ Conversion of a GenBank format file into a glimmer-like
+ one. Eukaryotic version.}
+\description{
+This function reads a file in GenBank format and converts the features
+corresponding to CDS (Coding Sequences) into a format similar to
+glimmer program output. This function is specifically made for
+eukaryotic sequences, i.e. with introns.
+}
+\usage{
+gbk2g2.euk(gbkfile = system.file("sequences/ame1.gbk", package ="seqinr"),
+g2.coord = "g2.coord")
+}
+
+\arguments{
+ \item{gbkfile}{ The name of the GenBank file }
+ \item{g2.coord}{ The name of the output file }
+}
+\details{
+This function returns the coordinates of the exons annotated in the
+GenBank format file.
+}
+\value{
+A data frame with three columns will be written to the \code{g2.coord}
+file. The first column corresponds to the name of the gene, given in the
+GenBank file through the \code{/gene} feature. The second and third
+column contain the start and the stop position of the exon.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry, A. Necşulea}
+\seealso{ \code{\link{oriloc}}, \code{\link{gbk2g2}} }
+\examples{
+ \dontrun{ gbk2g2.euk() }
+}
+\keyword{utilities}
diff --git a/man/gcO2.Rd b/man/gcO2.Rd
new file mode 100644
index 0000000..9bb073f
--- /dev/null
+++ b/man/gcO2.Rd
@@ -0,0 +1,21 @@
+\name{gcO2}
+\alias{gcO2}
+\docType{data}
+\title{GC content and aerobiosis in bacteria}
+\description{
+ This data set was used in Naya \emph{et al.} (2002) to study the relationship between the genomic G+C content of bacteria and whether they are (stricly) aerobes or anaerobes.
+}
+\format{
+\code{gcO2} is a data frame.
+}
+\source{
+Naya, H., Romero, H., Zavala, A., Alvarez, B. and Musto, H. (2002) Aerobiosis increases the Genomic Guanine Plus Cytosine Content (GC%) in prokaryotes. \emph{Journal of Molecular Evolution}, \bold{55}:260-264.
+
+Data imported into seqinr by J.R. Lobry on 09-OCT-2016. Original source location given in the article was \code{http://oeg.fcien.edu.uy/GCprok/} but is no more active. Data were copied at \url{http://pbil.univ-lyon1.fr/R/donnees/gcO2.txt} (\emph{cf.} section 2.1 in Lobry, J.R (2004) Life history traits and genome structure: aerobiosis and G+C content in bacteria. \emph{Lecture Notes in Computer Sciences}, \bold{3039}:679-686). Import was from this last ressource.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\examples{
+data(gcO2)
+}
diff --git a/man/gcT.Rd b/man/gcT.Rd
new file mode 100644
index 0000000..c1f0beb
--- /dev/null
+++ b/man/gcT.Rd
@@ -0,0 +1,34 @@
+\name{gcT}
+\alias{gcT}
+\docType{data}
+\title{GC content and temperature in bacteria}
+\description{
+ This data set was used in Galtier and Lobry (1997) to study the relationship between the optimal growth temperature of bacteria and their G+C content at the genomic level and locally were selection is active to maintain secondary structures in the stems of RNAs.
+}
+\format{
+ \code{gcT} is a list containing the 9 following components:
+ \describe{
+ \item{species}{is a data frame containing the optimal growth temperature and genomic G+C content for 772 bacterial species. Detailled explanations for this table and the following are available in the \code{README} component.}
+ \item{genus}{is a data frame containing the optimal growth temperature and genomic G+C content for 224 bacterial genus.}
+ \item{details}{is a data frame with more information, see \code{README}.}
+ \item{gc16S}{is a data frame containing the optimal growth temperature and stems G+C content for 16S RNA from 165 bacterial genus.}
+ \item{gctRNA}{is a data frame containing the optimal growth temperature and stems G+C content for tRNA from 51 bacterial genus.}
+ \item{gc23S}{is a data frame containing the optimal growth temperature and stems G+C content for 23S RNA from 38 bacterial genus.}
+ \item{gc5S}{is a data frame containing the optimal growth temperature and stems G+C content for 5S RNA from 71 bacterial genus.}
+ \item{README}{is the original README file from \code{ftp://biom3.univ-lyon1.fr/pub/datasets/JME97/} last updated 13-MAY-2002.}
+ \item{importgcT}{is the R script used to import data.}
+ }
+}
+\source{
+Galtier, N. & Lobry, J.R. (1997). Relationships between genomic G+C
+content, RNA secondary structures, and optimal growth temperature
+in prokaryotes. \emph{Journal of Molecular Evolution} \bold{44}:632-636.\cr
+
+Data imported into seqinr with the R script given in the last component of the dataset by J.R. Lobry on 09-OCT-2016.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\examples{
+data(gcT)
+}
diff --git a/man/get.db.growth.Rd b/man/get.db.growth.Rd
new file mode 100755
index 0000000..e5c3917
--- /dev/null
+++ b/man/get.db.growth.Rd
@@ -0,0 +1,61 @@
+\name{get.db.growth}
+\alias{get.db.growth}
+\alias{dia.db.growth}
+\title{ Get the exponential growth of nucleic acid database content }
+\description{
+ Connects to the embl database to read the last release note about
+the number of nucleotides in the DDBJ/EMBL/Genbank database content.
+A log-linear fit is represented by dia.bd.gowth() with an estimate of
+the doubling time in months.
+}
+\usage{
+get.db.growth(
+where = "ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/relnotes.txt")
+dia.db.growth( get.db.growth.out = get.db.growth(), Moore = TRUE, ... )
+}
+\arguments{
+ \item{where}{ the file containig the database growth table.}
+ \item{get.db.growth.out}{ the output from get.db.growth() }
+ \item{Moore}{ logical, if TRUE add lines corresponding to an
+exponential growth rate with a doubling time of 18 months, that
+is Moore's law.}
+ \item{...}{further arguments to plot}
+}
+\value{
+ A dataframe with the statistics from the embl site.
+}
+
+\details{
+This is a screenshot from fig. 1 in Lobry (2004):
+
+\if{html}{\figure{lncs2004.pdf}{options: width=400}}
+\if{latex}{\figure{lncs2004.pdf}{options: width=12cm}}
+
+At that time the doubling time was 16.9 months. This is an
+update in 2016 from release 3.1-5 of the seqinr tutorial
+\url{http://seqinr.r-forge.r-project.org/seqinr_3_1-5.pdf}:
+
+\if{html}{\figure{introduction-dbg.pdf}{options: width=400}}
+\if{latex}{\figure{introduction-dbg.pdf}{options: width=12cm}}
+
+The doubling time was 18.8 monts in this update. The fit to
+Moore's law is still striking over such a long period.
+
+}
+\references{
+\url{http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.txt}
+
+Lobry, J.R. (2004) Life History Traits and Genome Structure:
+Aerobiosis and G+C Content in Bacteria. \emph{Lectures Notes in Computer Sciences},
+\bold{3039}:679-686.
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\examples{
+\dontrun{
+ ### Need internet connection
+ data <- get.db.growth()
+ dia.db.growth(data)
+}}
+\keyword{utilities}
diff --git a/man/get.ncbi.Rd b/man/get.ncbi.Rd
new file mode 100755
index 0000000..d9cfc37
--- /dev/null
+++ b/man/get.ncbi.Rd
@@ -0,0 +1,41 @@
+\name{get.ncbi}
+\alias{get.ncbi}
+\alias{ncbi.fna.url}
+\alias{ncbi.ptt.url}
+\alias{ncbi.stats}
+\alias{ncbi.gbk.url}
+\title{ Bacterial complete genome data from ncbi ftp site }
+\description{
+Try to connect to ncbi ftp site to get a list of complete bacterial genomes.
+}
+\usage{
+get.ncbi(repository = "ftp://ftp.ncbi.nih.gov/genomes/Bacteria/")
+}
+\arguments{
+ \item{repository}{ Where to look for data. The default value is the location of the complete bacterial genome sequences at ncbi ftp repository. }
+}
+\value{
+Returns a data frame which contains the following columns:
+ \item{species}{The species name as given by the corresponding folder
+ name in the repository (\emph{e.g.} Yersinia\_pestis\_KIM).}
+ \item{accession}{The accession number as given by the common prefix
+ of file names in the repository (\emph{e.g.} NC\_004088).}
+ \item{size.bp}{The size of the sequence in bp (\emph{e.g.} 4600755).}
+ \item{type}{A factor with two levels (plasmid or chromosome) temptatively
+ deduced from the description of the sequence.}
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\section{WARNING }{
+This function is highly dependant on ncbi ftp site conventions
+for which we have no control.
+The ftp connection apparently does not work when there is a proxy,
+this problem is circumvented here in a rather crude way.
+}
+\examples{
+\dontrun{bacteria <- get.ncbi()}
+\dontrun{summary(bacteria)}
+}
+\keyword{utilities}
diff --git a/man/getAnnot.Rd b/man/getAnnot.Rd
new file mode 100644
index 0000000..b9e8aa7
--- /dev/null
+++ b/man/getAnnot.Rd
@@ -0,0 +1,88 @@
+\name{getAnnot}
+\alias{getAnnot}
+\alias{getAnnot.default}
+\alias{getAnnot.list}
+\alias{getAnnot.SeqAcnucWeb}
+\alias{getAnnot.SeqFastadna}
+\alias{getAnnot.SeqFastaAA}
+\alias{getAnnot.qaw}
+\alias{getAnnot.logical}
+\alias{readAnnots.socket}
+
+\title{Generic Function to get sequence annotations}
+\description{
+Annotations are taken from the \code{Annot} attribute for sequences
+imported from a FASTA file and retrieved from an ACNUC server for
+objects of the \code{SeqAcnucWeb} class.
+}
+\usage{
+getAnnot(object, ...)
+\method{getAnnot}{SeqAcnucWeb}(object, ..., nbl = 100, socket = autosocket())
+}
+\arguments{
+ \item{object}{ an object of the class \code{SeqAcnucWeb}
+ or \code{SeqFastadna}, or \code{SeqFastaAA} or a list of these objects }
+ \item{nbl}{ the maximum number of line of annotation to read. Reading of
+ lines stops when nbl lines have been transmitted or at the last annotation
+ line of the sequence (SQ or ORIGIN line). }
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{...}{further arguments passed to or from other methods}
+}
+\value{
+ \code{getAnnot} returns a vector of string of characters containing the
+ annotations for the sequences.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry, L. Palmeira}
+\seealso{
+ \code{\link{query}}, \code{\link{SeqAcnucWeb}}, \code{\link{c2s}}, \code{\link{translate}} and \code{\link{prepgetannots}} to select the annotation lines.
+}
+\examples{
+#
+# List all available methods for getAnnot generic function:
+#
+ methods(getAnnot)
+#
+# SeqAcnucWeb class example:
+#
+ \dontrun{
+ # Need internet connection
+ choosebank("emblTP")
+ fc<-query("fc", "sp=felis catus et t=cds et O=mitochondrion et Y>2001 et no k=partial")
+ # get the first 5 lines annotating the first sequence:
+ annots <- getAnnot(fc$req[[1]], nbl = 5)
+ cat(annots, sep = "\n")
+ # or use the list method to get them all at once:
+ annots <- getAnnot(fc$req, nbl = 5)
+ cat(annots, sep = "\n")
+ closebank()
+ }
+#
+# SeqFastaAA class example:
+#
+ aafile <- system.file("sequences/seqAA.fasta", package = "seqinr")
+ sfaa <- read.fasta(aafile, seqtype = "AA")
+ getAnnot(sfaa[[1]])
+#
+# SeqFastadna class example:
+#
+ dnafile <- system.file("sequences/malM.fasta", package = "seqinr")
+ sfdna <- read.fasta(file = dnafile)
+ getAnnot(sfdna[[1]])
+#
+# Example with a FASTA file with multiple entries:
+#
+ ff <- system.file("sequences/someORF.fsa", package = "seqinr")
+ fs <- read.fasta(ff)
+ getAnnot(fs) # the list method is used here to get them all at once
+#
+# Default getAnnot method example. An error is produced because
+# there are no annotations by default:
+#
+ result <- try(getAnnot(letters))
+ stopifnot(!inherits("result", "try-error"))
+}
+\keyword{ utilities }
diff --git a/man/getFrag.Rd b/man/getFrag.Rd
new file mode 100644
index 0000000..57110b9
--- /dev/null
+++ b/man/getFrag.Rd
@@ -0,0 +1,59 @@
+\name{getFrag}
+\alias{getFrag}
+\alias{getFrag.default}
+\alias{getFrag.list}
+\alias{getFrag.character}
+\alias{getFrag.SeqFrag}
+\alias{getFrag.SeqAcnucWeb}
+\alias{getFrag.SeqFastadna}
+\alias{getFrag.SeqFastaAA}
+\alias{getFrag.qaw}
+\alias{getFrag.logical}
+
+\title{Generic function to extract sequence fragments}
+\description{
+getFrag is used to extract the sequence fragment starting at the \code{begin} position
+and ending at the \code{end} position.
+}
+\usage{
+getFrag(object, begin, end, ...)
+\method{getFrag}{SeqAcnucWeb}(object, begin, end, ..., socket = autosocket(), name = getName(object))
+\method{getFrag}{SeqFastadna}(object, begin, end, ..., name = getName(object))
+\method{getFrag}{SeqFastaAA}(object, begin, end, ..., name = getName(object))
+\method{getFrag}{SeqFrag}(object, begin, end, ..., name = getName(object))
+
+}
+\arguments{
+ \item{object}{ an object of the class \code{\link{SeqAcnucWeb}}
+ or \code{\link{SeqFastadna}}, or \code{\link{SeqFastaAA}} or \code{\link{SeqFrag}} or a list of these objects }
+ \item{begin}{ First position of the fragment to extract. This position is included. Numerotation starts at 1.}
+ \item{end}{ Last position of the fragment to extract. This position is included. }
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database by \code{\link{choosebank}}).}
+ \item{name}{the sequence name}
+ \item{...}{further arguments passed to or from other methods}
+}
+\value{
+ \code{getFrag} returns an object of class \code{\link{SeqFrag}}.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry, L. Palmeira}
+\seealso{
+ \code{\link{SeqAcnucWeb}}, \code{\link{SeqFastadna}}, \code{\link{SeqFastaAA}}, \code{\link{SeqFrag}}
+}
+\examples{
+#
+# List all available methods for getFrag generic function:
+#
+ methods(getFrag)
+#
+# Example with a DNA sequence from a FASTA file:
+#
+ dnafile <- system.file("sequences/malM.fasta", package = "seqinr")
+ sfdna <- read.fasta(file = dnafile)
+ myfrag <- getFrag(sfdna[[1]], begin = 1, end = 10)
+ stopifnot(getSequence(myfrag, as.string = TRUE) == "atgaaaatga")
+}
+\keyword{ utilities }
diff --git a/man/getKeyword.Rd b/man/getKeyword.Rd
new file mode 100644
index 0000000..8f140f1
--- /dev/null
+++ b/man/getKeyword.Rd
@@ -0,0 +1,53 @@
+\name{getKeyword}
+\alias{getKeyword}
+\alias{getKeyword.default}
+\alias{getKeyword.list}
+\alias{getKeyword.SeqAcnucWeb}
+\alias{getKeyword.qaw}
+\alias{getKeyword.logical}
+
+\title{Generic function to get keywords associated to sequences}
+\description{
+Get keywords from an ACNUC server.
+}
+\usage{
+getKeyword(object, ...)
+\method{getKeyword}{SeqAcnucWeb}(object, ..., socket = autosocket())
+}
+\arguments{
+ \item{object}{ an object of the class \code{\link{SeqAcnucWeb}}, or
+ a list of them, or the object resulting from \code{\link{query}}}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database by \code{\link{choosebank}}).}
+ \item{...}{further arguments passed to or from other methods}
+}
+\value{
+ \code{getKeyword} returns a vector of strings containing the keyword(s)
+ associated to a sequence.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry, L. Palmeira}
+\seealso{
+ \code{\link{SeqAcnucWeb}}
+}
+\examples{
+#
+# List all available methods for getKeyword generic function:
+#
+ methods(getKeyword)
+#
+# Example of keyword extraction from an ACNUC server:
+#
+ \dontrun{
+ # Need internet connection
+ choosebank("emblTP")
+ fc<-query("fc", "sp=felis catus et t=cds et o=mitochondrion")
+ getKeyword(fc$req[[1]])
+ # Should be:
+ # [1] "DIVISION ORG" "RELEASE 62" "CYTOCHROME B" "SOURCE" "CDS"
+ closebank()
+}
+}
+\keyword{ utilities }
diff --git a/man/getLength.Rd b/man/getLength.Rd
new file mode 100644
index 0000000..d5043aa
--- /dev/null
+++ b/man/getLength.Rd
@@ -0,0 +1,58 @@
+\name{getLength}
+\alias{getLength}
+\alias{getLength.default}
+\alias{getLength.list}
+\alias{getLength.character}
+\alias{getLength.SeqFrag}
+\alias{getLength.SeqAcnucWeb}
+\alias{getLength.SeqFastadna}
+\alias{getLength.SeqFastaAA}
+\alias{getLength.qaw}
+\alias{getLength.logical}
+
+\title{Generic function to get the length of sequences}
+\description{
+getLength returns the total number of bases or amino-acids in a sequence.
+}
+\usage{
+getLength(object, ...)
+}
+\arguments{
+ \item{object}{ an object of the class \code{\link{SeqAcnucWeb}}
+ or \code{\link{SeqFastadna}}, or \code{\link{SeqFastaAA}} or \code{\link{SeqFrag}} or a list of these objects }
+ \item{...}{further arguments passed to or from other methods}
+}
+\value{
+ \code{getLength} returns a numeric vector giving the length of the
+ sequences.}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry, L. Palmeira}
+\seealso{
+ \code{\link{SeqAcnucWeb}}, \code{\link{SeqFastadna}},
+ \code{\link{SeqFastaAA}}, \code{\link{SeqFrag}}
+}
+\examples{
+#
+# List all available methods for getLength generic function:
+#
+ methods(getLength)
+#
+# Example with seven DNA sequences from a FASTA file:
+#
+ ff <- system.file("sequences/someORF.fsa", package = "seqinr")
+ fs <- read.fasta(file = ff)
+ stopifnot(all(getLength(fs) == c(5573, 5825, 2987, 3929, 2648, 2597, 2780)))
+#
+# Example with 49 sequences from an ACNUC server:
+#
+ \dontrun{
+ # Need internet connection
+ choosebank("emblTP")
+ fc <- query("fc", "sp=felis catus et t=cds et o=mitochondrion")
+ getLength(fc)
+ closebank()
+}
+}
+\keyword{ utilities }
diff --git a/man/getLocation.Rd b/man/getLocation.Rd
new file mode 100644
index 0000000..e749b8d
--- /dev/null
+++ b/man/getLocation.Rd
@@ -0,0 +1,54 @@
+\name{getLocation}
+\alias{getLocation}
+\alias{getLocation.default}
+\alias{getLocation.list}
+\alias{getLocation.SeqAcnucWeb}
+\alias{getLocation.qaw}
+\alias{getLocation.logical}
+
+\title{Generic function to get the location of subsequences on the parent sequence}
+\description{
+This function works only with subsequences from an ACNUC server.
+}
+\usage{
+getLocation(object, ...)
+\method{getLocation}{SeqAcnucWeb}(object, ..., socket = autosocket())
+}
+\arguments{
+ \item{object}{ an object of the class \code{\link{SeqAcnucWeb}}, or a list of them,
+ or an object created by \code{\link{query}} }
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database by \code{\link{choosebank}}).}
+ \item{...}{further arguments passed to or from other methods}
+}
+\value{
+ A list giving the positions of the sequence
+ on the parent sequence. If the sequence is a subsequence (\emph{e.g.} coding
+ sequence), the function returns the position of each exon on the
+ parent sequence. NA is returned for parent sequences and a warning
+ is isued.}
+
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry, L. Palmeira}
+\seealso{
+ \code{\link{SeqAcnucWeb}}
+}
+\examples{
+#
+# List all available methods for getLocation generic function:
+#
+ methods(getLocation)
+#
+# Example with a subsequence from an ACNUC server:
+#
+ \dontrun{
+ # Need internet connection
+ choosebank("emblTP")
+ fc <- query("fc", "sp=felis catus et t=cds et o=mitochondrion")
+ getLocation(fc$req[[5]])
+ closebank()
+}
+}
+\keyword{ utilities }
diff --git a/man/getName.Rd b/man/getName.Rd
new file mode 100644
index 0000000..7fb28e8
--- /dev/null
+++ b/man/getName.Rd
@@ -0,0 +1,57 @@
+\name{getName}
+\alias{getName}
+\alias{getName.default}
+\alias{getName.list}
+\alias{getName.SeqFrag}
+\alias{getName.SeqAcnucWeb}
+\alias{getName.SeqFastadna}
+\alias{getName.SeqFastaAA}
+\alias{getName.qaw}
+\alias{getName.logical}
+
+\title{Generic function to get the names of sequences}
+\description{
+GetName returns the sequence names.
+}
+\usage{
+getName(object, ...)
+}
+\arguments{
+ \item{object}{ an object of the class \code{\link{SeqAcnucWeb}}
+ or \code{\link{SeqFastadna}}, or \code{\link{SeqFastaAA}} or \code{\link{SeqFrag}} or a list of these objects }
+ \item{...}{further arguments passed to or from other methods}
+}
+\value{
+ an object of class \code{character} containing the names of the sequences}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry, L. Palmeira}
+\seealso{
+ \code{\link{SeqAcnucWeb}}, \code{\link{SeqFastadna}},
+ \code{\link{SeqFastaAA}}, \code{\link{SeqFrag}}
+}
+\examples{
+#
+# List all available methods for getName generic function:
+#
+ methods(getName)
+#
+# Example with seven DNA sequences from a FASTA file:
+#
+ ff <- system.file("sequences/someORF.fsa", package = "seqinr")
+ fs <- read.fasta(file = ff)
+ stopifnot(all(getName(fs) == c("YAL001C", "YAL002W", "YAL003W",
+ "YAL005C", "YAL007C", "YAL008W", "YAL009W")))
+#
+# Example with 49 sequences from an ACNUC server:
+#
+ \dontrun{
+ # Need internet connection
+ choosebank("emblTP")
+ fc <- query("fc", "sp=felis catus et t=cds et o=mitochondrion")
+ getName(fc)
+ closebank()
+}
+}
+\keyword{ utilities }
diff --git a/man/getSequence.Rd b/man/getSequence.Rd
new file mode 100644
index 0000000..9ae916b
--- /dev/null
+++ b/man/getSequence.Rd
@@ -0,0 +1,79 @@
+\name{getSequence}
+\alias{getSequence}
+\alias{getSequence.default}
+\alias{getSequence.list}
+\alias{getSequence.character}
+\alias{getSequence.SeqFrag}
+\alias{getSequence.SeqAcnucWeb}
+\alias{getSequence.SeqFastadna}
+\alias{getSequence.SeqFastaAA}
+\alias{getSequence.qaw}
+\alias{getSequence.logical}
+
+\title{Generic function to get sequence data}
+\description{
+getSequence returns the sequence either as vector of single chararacters
+or as a single string of multiple characters.
+}
+\usage{
+getSequence(object, as.string = FALSE, ...)
+\method{getSequence}{SeqAcnucWeb}(object, as.string = FALSE, ..., socket = autosocket())
+}
+\arguments{
+ \item{object}{ an object of the class \code{\link{SeqAcnucWeb}}
+ or \code{\link{SeqFastadna}}, or \code{\link{SeqFastaAA}} or \code{\link{SeqFrag}} or a list of these objects, or an object of class \code{qaw} created by \code{\link{query}} }
+ \item{as.string}{if TRUE sequences are returned as strings of multiple characters
+ instead of a vector of single characters}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{...}{further arguments passed to or from other methods}
+}
+\value{
+ For a single sequence an object of class \code{character} containing the characters
+ of the sequence, either of length 1 when \code{as.string} is TRUE, or of the length
+ of the sequence when \code{as.string} is FALSE. For many sequences, a list of these.}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry, L. Palmeira}
+\seealso{
+ \code{\link{SeqAcnucWeb}}, \code{\link{SeqFastadna}},
+ \code{\link{SeqFastaAA}}, \code{\link{SeqFrag}}
+}
+\examples{
+#
+# List all available methods for getSequence generic function:
+#
+ methods(getSequence)
+#
+# SeqAcnucWeb class example:
+#
+ \dontrun{# Need internet connection
+ choosebank("emblTP")
+ fc <- query("fc", "sp=felis catus et t=cds et o=mitochondrion")
+ getSequence(fc$req[[1]])
+ getSequence(fc$req[[1]], as.string = TRUE)
+ closebank()
+ }
+#
+# SeqFastaAA class example:
+#
+ aafile <- system.file("sequences/seqAA.fasta", package = "seqinr")
+ sfaa <- read.fasta(aafile, seqtype = "AA")
+ getSequence(sfaa[[1]])
+ getSequence(sfaa[[1]], as.string = TRUE)
+#
+# SeqFastadna class example:
+#
+ dnafile <- system.file("sequences/someORF.fsa", package = "seqinr")
+ sfdna <- read.fasta(file = dnafile)
+ getSequence(sfdna[[1]])
+ getSequence(sfdna[[1]], as.string = TRUE)
+#
+# SeqFrag class example:
+#
+ sfrag <- getFrag(object = sfdna[[1]], begin = 1, end = 10)
+ getSequence(sfrag)
+ getSequence(sfrag, as.string = TRUE)
+}
+\keyword{ utilities }
diff --git a/man/getTrans.Rd b/man/getTrans.Rd
new file mode 100644
index 0000000..4879469
--- /dev/null
+++ b/man/getTrans.Rd
@@ -0,0 +1,118 @@
+\name{getTrans}
+\alias{getTrans}
+\alias{getTrans.default}
+\alias{getTrans.list}
+\alias{getTrans.character}
+\alias{getTrans.SeqFrag}
+\alias{getTrans.SeqAcnucWeb}
+\alias{getTrans.SeqFastadna}
+\alias{getTrans.qaw}
+\alias{getTrans.logical}
+
+\title{Generic function to translate coding sequences into proteins}
+\description{
+This function translates nucleic acid sequences into the corresponding peptide sequence. It can translate in any of the 3 forward or three reverse sense frames. In the case of reverse sense, the reverse-complement of the sequence is taken. It can translate using the standard (universal) genetic code and also with non-standard codes. Ambiguous bases can also be handled.
+}
+\usage{
+getTrans(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...)
+\method{getTrans}{SeqAcnucWeb}(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...,
+ frame = "auto", numcode = "auto")
+\method{getTrans}{SeqFastadna}(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...,
+ frame = 0, numcode = 1)
+\method{getTrans}{SeqFrag}(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...,
+ frame = 0, numcode = 1)
+
+}
+\arguments{
+ \item{object}{ an object of the class \code{\link{SeqAcnucWeb}}
+ or \code{\link{SeqFastadna}}, or \code{\link{SeqFrag}} or a list of these objects, or an object of class \code{qaw} created by \code{\link{query}} }
+ \item{numcode}{ The ncbi genetic code number for translation. By default the standard genetic code is used, and for sequences
+ coming from an ACNUC server the relevant genetic code is used by default. }
+ \item{NAstring}{ How to translate amino-acids when there are ambiguous bases in codons. }
+ \item{ambiguous}{ If TRUE, ambiguous bases are taken into account so that for instance
+ GGN is translated to Gly in the standard genetic code. }
+ \item{frame}{ Frame(s) (0,1,2) to translate. By default the frame \code{0} is used. }
+ \item{sens}{ Direction for translation: \code{F} for the direct strand e and \code{R} for the reverse complementary strand. }
+ \item{...}{further arguments passed to or from other methods}
+}
+\details{
+ The following genetic codes are described here. The number preceding each code
+ corresponds to \code{numcode}.
+ \describe{
+ \item{1}{ standard }
+ \item{2}{ vertebrate.mitochondrial }
+ \item{3}{ yeast.mitochondrial }
+ \item{4}{ protozoan.mitochondrial+mycoplasma }
+ \item{5}{ invertebrate.mitochondrial }
+ \item{6}{ ciliate+dasycladaceal }
+ \item{9}{ echinoderm+flatworm.mitochondrial }
+ \item{10}{ euplotid }
+ \item{11}{ bacterial+plantplastid }
+ \item{12}{ alternativeyeast }
+ \item{13}{ ascidian.mitochondrial }
+ \item{14}{ alternativeflatworm.mitochondrial }
+ \item{15}{ blepharism }
+ \item{16}{ chlorophycean.mitochondrial }
+ \item{21}{ trematode.mitochondrial }
+ \item{22}{ scenedesmus.mitochondrial }
+ \item{23}{ hraustochytrium.mitochondria }
+ }
+}
+\value{
+ For a single sequence an object of class \code{character} containing the characters
+ of the sequence, either of length 1 when \code{as.string} is TRUE, or of the length
+ of the sequence when \code{as.string} is FALSE. For many sequences, a list of these.}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry, L. Palmeira}
+\seealso{
+ \code{\link{SeqAcnucWeb}}, \code{\link{SeqFastadna}}, \code{\link{SeqFrag}}\cr
+ The genetic codes are given in the object \code{\link{SEQINR.UTIL}}, a more
+ human readable form is given by the function \code{\link{tablecode}}.
+ Use \code{\link{aaa}} to get the three-letter code for amino-acids.
+}
+\examples{
+#
+# List all available methods for getTrans generic function:
+#
+ methods(getTrans)
+#
+# Toy CDS example invented by Leonor Palmeira:
+#
+ toycds <- s2c("tctgagcaaataaatcgg")
+ getTrans(toycds) # should be c("S", "E", "Q", "I", "N", "R")
+#
+# Toy CDS example with ambiguous bases:
+#
+ toycds2 <- s2c("tcngarcarathaaycgn")
+ getTrans(toycds2) # should be c("X", "X", "X", "X", "X", "X")
+ getTrans(toycds2, ambiguous = TRUE) # should be c("S", "E", "Q", "I", "N", "R")
+ getTrans(toycds2, ambiguous = TRUE, numcode = 2) # should be c("S", "E", "Q", "X", "N", "R")
+#
+# Real CDS example:
+#
+ realcds <- read.fasta(file = system.file("sequences/malM.fasta", package ="seqinr"))[[1]]
+ getTrans(realcds)
+# Biologically correct, only one stop codon at the end
+ getTrans(realcds, frame = 3, sens = "R", numcode = 6)
+# Biologically meaningless, note the in-frame stop codons
+
+#
+# Complex transsplicing operations, the correct frame and the correct
+# genetic code are automatically used for translation into protein for
+# sequences coming from an ACNUC server:
+#
+\dontrun{
+ # Need internet connection.
+ # Translation of the following EMBL entry:
+ #
+ # FT CDS join(complement(153944..154157),complement(153727..153866),
+ # FT complement(152185..153037),138523..138735,138795..138955)
+ # FT /codon_start=1
+ choosebank("emblTP")
+ trans <- query("trans", "N=AE003734.PE35")
+ getTrans(trans$req[[1]])
+}
+}
+\keyword{ utilities }
diff --git a/man/getType.Rd b/man/getType.Rd
new file mode 100644
index 0000000..360933a
--- /dev/null
+++ b/man/getType.Rd
@@ -0,0 +1,30 @@
+\name{getType}
+\alias{getType}
+\title{To get available subsequence types in an opened ACNUC database}
+\description{
+ This function returns all subsequence types (e.g. CDS, TRNA) present
+ in an opened ACNUC database, using default database if no socket is
+ provided.
+}
+\usage{
+getType(socket = autosocket())
+}
+\arguments{
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+}
+\value{
+ a list containing a short description for each subsequence type.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry}
+\seealso{ \code{\link{choosebank}}, \code{\link{query}} }
+\examples{
+\dontrun{
+# Need internet connection
+ choosebank("emblTP")
+ getType()}
+}
+\keyword{utilities}
diff --git a/man/getlistrank.Rd b/man/getlistrank.Rd
new file mode 100644
index 0000000..fd64dc5
--- /dev/null
+++ b/man/getlistrank.Rd
@@ -0,0 +1,46 @@
+\name{getlistrank}
+\alias{getlistrank}
+\alias{glr}
+\title{To get the rank of a list from its name}
+\description{
+This is a low level function to get the rank of a list on server from its name.
+}
+
+\usage{
+getlistrank(listname, socket = autosocket(), verbose = FALSE)
+glr(listname, socket = autosocket(), verbose = FALSE)
+}
+
+\arguments{
+ \item{listname}{the name of list on server}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{verbose}{if \code{TRUE}, verbose mode is on}
+}
+
+\details{
+ This low level function is usually not used directly by the user.
+}
+
+\value{
+ The rank of list named \code{listname} on server, or 0 if no list with this name exists.
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{
+ \code{\link{choosebank}}, \code{\link{query}}
+}
+
+\examples{
+ \dontrun{
+ # Need internet connection
+ choosebank("emblTP")
+ MyListName <- query("MyListName", "sp=Borrelia burgdorferi", virtual = TRUE)
+ (result <- getlistrank("MyListName"))
+ stopifnot(result == 2)
+ closebank()
+ }
+}
+\keyword{utilities}
diff --git a/man/getliststate.Rd b/man/getliststate.Rd
new file mode 100644
index 0000000..452975d
--- /dev/null
+++ b/man/getliststate.Rd
@@ -0,0 +1,51 @@
+\name{getliststate}
+\alias{getliststate}
+\alias{gls}
+\alias{gln}
+\title{Asks for information about an ACNUC list of specified rank}
+\description{
+Reply gives the type of list, its name, the number of elements it contains,
+and, for sequence lists, says whether the list contains only parent seqs (locus=T).
+}
+\usage{
+getliststate(lrank, socket = autosocket())
+gls(lrank, socket = autosocket())
+gln(lrank, ...)
+}
+\arguments{
+ \item{lrank}{the name of the ACNUC list to modify}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{...}{arguments passed to getliststate}
+}
+
+\value{
+NA in case of problem and an warning is issued. When there is no problem
+a list with the following 4 components:
+
+ \item{type}{string. Type of ACNUC list (SQ, KW, SP)}
+ \item{name}{string. ACNUC list name}
+ \item{count}{numeric. Number of elements in ACNUC list}
+ \item{locus}{logical. For ACNUC sequence lists TRUE means that the list
+ contains only parent sequences. NA otherwise.}
+
+\code{gln} is a shortcut for \code{getliststate(lrank, ...)$name}
+}
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}}, \code{\link{alr}},
+\code{\link{glr}} }
+\examples{
+\dontrun{
+ ### Need internet connection
+ choosebank("emblTP")
+ mylist <- query("mylist", "sp=felis catus et t=cds", virtual=TRUE)
+ getliststate(glr("mylist")) # SQ, MYLIST, 603, FALSE
+ gln(glr("mylist")) # MYLIST (upper case letters on server)
+ closebank()
+ }
+}
+\keyword{ utilities }
diff --git a/man/gfrag.Rd b/man/gfrag.Rd
new file mode 100644
index 0000000..c006971
--- /dev/null
+++ b/man/gfrag.Rd
@@ -0,0 +1,37 @@
+\name{gfrag}
+\alias{gfrag}
+\title{Extract sequence identified by name or by number from an ACNUC server}
+\description{
+Get \code{length} characters from sequence identified by name or by number
+starting from position \code{start} (counted from 1).
+}
+\usage{
+gfrag(what, start, length, idby = c("name", "number"), socket = autosocket())
+}
+\arguments{
+ \item{what}{A sequence name or number}
+ \item{start}{Start position from 1}
+ \item{length}{Number of requested characters (answer may be shorter)}
+ \item{idby}{Is the sequence identified by name or number? Default to name}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+}
+\value{
+A string of characters with at most \code{length} characters (may be
+shorter than asked for). NA is returned and a warning is issued in
+case of problem (non existent sequence for instance).
+}
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}} }
+\examples{
+\dontrun{# Need internet connection
+ choosebank("emblTP")
+ gfrag("LMFLCHR36", start = 1, length = 3529852) -> myseq
+ stopifnot(nchar(myseq) == 3529852)
+ closebank()
+ }
+}
diff --git a/man/ghelp.Rd b/man/ghelp.Rd
new file mode 100644
index 0000000..d73e9cc
--- /dev/null
+++ b/man/ghelp.Rd
@@ -0,0 +1,43 @@
+\name{ghelp}
+\alias{ghelp}
+\title{Get help from an ACNUC server}
+\description{
+Reads one item of information in specified help file from an ACNUC server.
+The are differences between ACNUC clients so that this help could be
+confusing. However, the query language is common to all clients so that
+the most recent documentation is most likely here.
+}
+\usage{
+ghelp(item = c("GENERAL", "SELECT", "SPECIES", "KEYWORD"),
+ file = c("HELP", "HELP_WIN"), socket = autosocket(), catresult = TRUE)
+
+}
+\arguments{
+ \item{item}{the name of the desired help item}
+ \item{file}{the name of the help file on server side.}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{catresult}{logical. If TRUE output is redirected to the console.}
+}
+
+\value{
+A vector of string which is returned invisibly and "cated" to the console
+by default.
+}
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}} }
+\examples{
+\dontrun{
+ ### Need internet connection
+ choosebank("emblTP")
+ ghelp()
+ ghelp("SELECT")
+ # To get info about current database:
+ ghelp("CONT")
+ }
+}
+\keyword{ utilities }
diff --git a/man/gs500liz.Rd b/man/gs500liz.Rd
new file mode 100644
index 0000000..7632a37
--- /dev/null
+++ b/man/gs500liz.Rd
@@ -0,0 +1,43 @@
+\name{gs500liz}
+\alias{gs500liz}
+\docType{data}
+\title{GS500LIZ size standards}
+\description{
+GS500LIZ is an internal size standard often used in capillary electrophoresis.
+It contains 16 fragments ranging in size from 35 to 500 bp. Note that they are not
+all used for calibration : fragments at 250 and 340 bp may migrate anomalously
+(most likey because of secondary structure formation).
+}
+\usage{data(gs500liz)}
+\format{
+ A list with 3 components.
+ \describe{
+ \item{liz}{a vector of 16 values for the fragment sizes in bp.}
+ \item{mask1}{a vector of 16 logicals to remove fragments whose
+ migration may be anomalous (250 and 340 bp).}
+ \item{mask2}{a vector of 16 logicals to remove extreme fragments (35, 50,
+ 490, 500 bp) so that the resulting fragments are in the 75-450 bp range.}
+ }
+}
+
+\examples{
+data(gs500liz)
+op <- par(no.readonly = TRUE)
+par(lend = "butt", mar = c(5,0,4,0)+0.1)
+x <- gs500liz$liz
+n <- length(x)
+y <- rep(1, n)
+plot(x, y, type = "h", yaxt = "n", xlab = "Fragment size [bp]",
+ main = "GS500LIZ size standard", lwd = 2)
+x1 <- x[!gs500liz$mask1]
+segments(x1, 0, x1, 1, col = "red", lwd = 2)
+x2 <- x[!gs500liz$mask2]
+segments(x2, 0, x2, 1, col = "blue", lwd = 2)
+col <- rep("black", n)
+col[!gs500liz$mask1] <- "red"
+col[!gs500liz$mask2] <- "blue"
+text(x,1.05,paste(x, "bp"), srt = 90, col = col)
+legend("top", inset = 0.1, legend = c("regular", "imprecise (mask1)", "extreme (mask2)"),
+ lwd = 2, col = c("black","red","blue"))
+par(op)
+}
diff --git a/man/identifiler.Rd b/man/identifiler.Rd
new file mode 100644
index 0000000..470e544
--- /dev/null
+++ b/man/identifiler.Rd
@@ -0,0 +1,27 @@
+\name{identifiler}
+\alias{identifiler}
+\docType{data}
+\title{Identifiler allele names}
+\description{
+Names of the alleles in the Applied Biosystem identifiler allelic ladder.
+}
+\usage{data(identifiler)}
+\format{
+ A list with 4 components for the four fluorochromes.
+ \describe{
+ \item{FAM}{a list of 4 loci}
+ \item{VIC}{a list of 5 loci}
+ \item{NED}{a list of 4 loci}
+ \item{PET}{a list of 3 loci}
+ }
+}
+
+\examples{
+data(identifiler)
+op <- par(no.readonly = TRUE)
+par(mar = c(3,8,4,2)+0.1)
+allcount <- unlist(lapply(identifiler, function(x) lapply(x, length)))
+barplot(allcount[order(allcount)], horiz = TRUE, las = 1,
+main = "Allele count per locus", col = "lightblue")
+par(op)
+}
diff --git a/man/isenum.Rd b/man/isenum.Rd
new file mode 100644
index 0000000..462680f
--- /dev/null
+++ b/man/isenum.Rd
@@ -0,0 +1,61 @@
+\name{isenum}
+\alias{isenum}
+\alias{isn}
+\alias{getNumber.socket}
+\alias{getAttributsocket}
+\title{Get the ACNUC number of a sequence from its name or accession number}
+\description{
+Gives the ACNUC number of a sequence in the \code{number} element of the returned list.
+More informations are returned for subsequences corresponding to coding sequences.
+}
+\usage{
+isenum(what, idby = c("name", "access"), socket = autosocket())
+isn(what, ...)
+getNumber.socket(socket, name)
+getAttributsocket(socket, name)
+}
+\arguments{
+ \item{what}{a sequence name or a sequence accession number}
+ \item{idby}{is the sequence identified by name or by accession number? Default to name}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{...}{arguments passed to \code{isenum}.}
+ \item{name}{a sequence name.}
+}
+\value{
+A list whith the following 6 components:
+
+ \item{number}{numeric. The ACNUC number of the sequence.}
+ \item{length}{numeric. The length of the sequence.}
+ \item{frame}{numeric. The reading frame (0, 1, or 2) of the sequence for CDS.}
+ \item{gencode}{numeric. ACNUC's genetic code (0 means universal) of the sequence for CDS.}
+ \item{ncbigc}{numeric. NCBI's genetic code (0 means universal) of the sequence for CDS.}
+ \item{otheraccessmatches}{logical. If TRUE it means that several sequences are attached
+ to the given accession nunmber, and that only the ACNUC number of the first attached
+ sequence is returned in the \code{number} component of the list.}
+
+\code{isn(what, ...)} is a shortcut for \code{isenum(what, ...)$number}.\cr
+
+As from seqinR 1.1-3 \code{getNumber.socket} and
+\code{getAttributsocket} are deprecated (a warning is issued).
+}
+
+
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}} }
+\examples{
+\dontrun{
+ ### Need internet connection
+ choosebank("emblTP")
+ isenum("LMFLCHR36")
+ isn("LMFLCHR36")
+ stopifnot(isn("LMFLCHR36") == 13682678)
+ # Example with CDS:
+ isenum("AB004237")
+ }
+}
+\keyword{ utilities }
diff --git a/man/kaks.Rd b/man/kaks.Rd
new file mode 100644
index 0000000..47cccaa
--- /dev/null
+++ b/man/kaks.Rd
@@ -0,0 +1,101 @@
+\name{kaks}
+\alias{kaks}
+\title{Ka and Ks, also known as dn and ds, computation}
+\description{ Ks and Ka are, respectively, the number of substitutions per synonymous site and per non-synonymous site between two protein-coding genes. They are also denoted as ds and dn in the literature. The ratio of nonsynonymous (Ka) to synonymous (Ks) nucleotide substitution rates is an indicator of selective pressures on genes. A ratio significantly greater than 1 indicates positive selective pressure. A ratio around 1 indicates either neutral evolution at the protein level or an [...]
+}
+\usage{
+kaks(x,verbose = FALSE, debug = FALSE, forceUpperCase = TRUE)
+}
+\arguments{
+ \item{x}{ An object of class \code{alignment}, obtained for instance by importing into R the data from an alignment file with the \code{\link{read.alignment}} function. This is typically a set of coding sequences aligned at the protein level, see \code{\link{reverse.align}}.}
+ \item{verbose}{ If TRUE add to the results the value of L0, L2, L4 (respectively the number of non-synonymous sites, of 2-fold synonymous sites, of 4-fold synonymous sites), A0, A2, A4 (respectively the number of transitional changes at non-synonymous, 2-fold, and 4-fold synonymous sites ) and B0, B2, B4 (respectively the number of transversional changes at non-synonymous, 2-fold, and 4-fold synonymous sites).}
+ \item{debug}{ If TRUE turns debug mode on.}
+ \item{forceUpperCase}{ If TRUE, the default value, all character in sequences are forced to the upper case
+ if at least one 'a', 'c', 'g', or 't' is found in the sequences.
+ Turning it to FALSE if the sequences are already in upper case will save time.}
+}
+\value{
+ \item{ ks }{ matrix of Ks values }
+ \item{ ka }{ matrix of Ka values }
+ \item{ vks }{ variance matrix of Ks }
+ \item{ vka }{ variance matrix of Ka }
+}
+\references{
+Li, W.-H. (1993) Unbiased estimation of the rates of synonymous and nonsynonymous substitution.
+\emph{J. Mol. Evol.}, \bold{36}:96-99.\cr
+
+Hurst, L.D. (2002) The Ka/Ks ratio: diagnosing the form of sequence evolution.
+\emph{Trends Genet.}, \bold{18}:486-486.\cr
+
+The C programm implementing this method was provided by Manolo Gouy. More info is
+needed here to trace back the original C source so as to credit correct source.
+The original FORTRAN-77 code by Chung-I Wu modified by Ken Wolfe is available
+here \url{http://wolfe.gen.tcd.ie/lab/pub/li93/}.\cr
+
+For a more recent discussion about the estimation of Ka and Ks see:\cr
+
+Tzeng, Y.H., Pan, R., Li, W.-H. (2004) Comparison of three methods for estimating
+rates of synonymous and nonsynonymous nucleotide substitutions.
+\emph{Mol. Biol. Evol}, \bold{21}:2290-2298.\cr
+
+The method implemented here is noted LWL85 in the above paper.\cr
+
+The cite this package in a publication, as any R package, try something as \code{citation("seqinr")}
+at your R prompt.
+}
+\note{
+Computing Ka and Ks makes sense for coding sequences that have been aligned at the amino-acid level before retro-translating the alignement at the nucleic acid level to ensure that sequences are compared on a codon-by-codon basis. Function \code{\link{reverse.align}} may help for this.
+
+As from seqinR 2.0-3, when there is at least one non ACGT base in a codon, this codon is considered as a gap-codon (\code{---}). This makes the computation more robust with respect to alignments with out-of-frame gaps, see example section.
+
+Gap-codons (\code{---}) are not used for computations.
+
+When the alignment does not contain enough information (\emph{i.e.} close to saturation), the Ka and Ks values are forced to 10.
+
+Negative values indicate that Ka and Ks can not be computed.
+
+According to Li (1993) J. Mol. Evol. 36(1):96-99,
+the rate of synonymous substitutions Ks is computed as:
+Ks = (L2.A2 + L4.A4) / (L2 + L4) + B4
+
+and the rate of non-synonymous substitutions Ka is computed as:
+Ka = A0 + (L0.B0 + L2.B2) / (L0 + L2)
+
+ }
+\author{D. Charif, J.R. Lobry}
+\seealso{\code{\link{read.alignment}} to import alignments from files, \code{\link{reverse.align}} to align CDS at the aa level.}
+\examples{
+ #
+ # Simple Toy example:
+ #
+ s <- read.alignment(file = system.file("sequences/test.phylip", package = "seqinr"),
+ format = "phylip")
+ kaks(s)
+ #
+ # Check numeric results on an simple test example:
+ #
+ data(AnoukResult)
+ Anouk <- read.alignment(file = system.file("sequences/Anouk.fasta", package = "seqinr"),
+ format = "fasta")
+## if( ! all.equal(kaks(Anouk), AnoukResult) ) {
+## warning("Poor numeric results with respect to AnoukResult standard")
+## } else {
+## print("Results are consistent with AnoukResult standard")
+## }
+#
+# As from seqinR 2.0-3 the following alignment with out-of-frame gaps
+# should return a zero Ka value.
+#
+# >Reference
+# ATGTGGTCGAGATATCGAAAGCTAGGGATATCGATTATATATAGCAAGATCGATAGAGGA
+# TCGATGATCGATCGGGATCGACAGCTG
+# >With out-of-frame gaps
+# AT-TGGTCCAGGTATCGTAAGCTAGGGATATCGATTATATATAGCAAGATCGATAGGGGA
+# TCGATGATCGATCGGGA--GACAGCTG
+#
+# This test example provided by Darren Obbard is now used as a routine check:
+#
+ Darren <- read.alignment(file = system.file("sequences/DarrenObbard.fasta", package = "seqinr"),
+ format = "fasta")
+ stopifnot( all.equal(kaks(Darren)$ka[1], 0) )
+}
diff --git a/man/knowndbs.Rd b/man/knowndbs.Rd
new file mode 100644
index 0000000..6dc8801
--- /dev/null
+++ b/man/knowndbs.Rd
@@ -0,0 +1,52 @@
+\name{knowndbs}
+\alias{knowndbs}
+\alias{kdb}
+\title{Description of databases known by an ACNUC server}
+\description{
+Returns, for each database known by the server, its name (a valid value for the \code{bank}
+argument of \code{\link{choosebank}}), availability (off means temporarily unavailable),
+and description.
+}
+\usage{
+knowndbs(tag = c(NA, "TP", "TEST", "DEV"), socket = autosocket())
+kdb(tag = c(NA, "TP", "TEST", "DEV"), socket = autosocket())
+}
+\arguments{
+ \item{tag}{default to NA, see details}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+}
+
+\details{
+When the optional \code{tag} argument is used, only databases tagged with the given
+string are listed;
+when this argument is NA (by default), only untagged databases are listed.
+The \code{tag} argument thus allows to identify series of special purpose (tagged) databases,
+in addition to default (untagged) ones.
+}
+
+\value{
+A dataframe with 3 columns:
+\item{bank}{string. Valid bank values known by the ACNUC server}
+\item{status}{string. "on" means available, "off" means temporarily unavailable}
+\item{info}{string. short description of the database}
+}
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+
+The full list of untagged and tagged databases is here :
+\url{http://doua.prabi.fr/databases/acnuc/banques_raa.php}.
+
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}} when called without arguments. }
+\examples{
+\dontrun{
+ ### Need internet connection
+ choosebank("emblTP")
+ kdb()
+ closebank()
+ }
+}
+\keyword{ utilities }
diff --git a/man/lseqinr.Rd b/man/lseqinr.Rd
new file mode 100644
index 0000000..e988739
--- /dev/null
+++ b/man/lseqinr.Rd
@@ -0,0 +1,23 @@
+\name{lseqinr}
+\alias{lseqinr}
+\title{ To see what's inside the package seqinr }
+\description{
+ This is just a shortcut for ls("package:seqinr")
+}
+\usage{
+lseqinr()
+}
+\value{
+ The list of objects in the package seqinr
+}
+\references{
+ \code{citation("seqinr")}
+}
+\note{
+Use \code{library(help=seqinr)} to have a summary of the functionc available in the package.
+}
+\author{J.R. Lobry}
+\examples{
+lseqinr()
+}
+\keyword{utilities}
diff --git a/man/m16j.Rd b/man/m16j.Rd
new file mode 100644
index 0000000..c42f3b0
--- /dev/null
+++ b/man/m16j.Rd
@@ -0,0 +1,101 @@
+\name{m16j}
+\alias{m16j}
+\docType{data}
+\title{Fragment of the E. coli chromosome}
+\description{
+ A fragment of the \emph{E. coli} chromosome that was used in Lobry (1996) to show the
+change in GC skew at the origin of replication (\emph{i.e.} the chirochore structure
+of bacterial chromosomes)
+}
+\usage{data(m16j)}
+\format{
+ A string of 1,616,539 characters
+}
+\details{
+The sequence used in Lobry (1996) was a 1,616,174 bp fragment obtained from the concatenation
+of nine overlapping sequences (U18997, U00039, L10328, M87049, L19201, U00006, U14003,
+D10483, D26562. Ambiguities have been resolved since then and its was
+a chimeric sequence from K-12 strains MG1655 and W3110, the sequence used here is
+from strain MG1655 only (Blattner \emph{et al.} 1997).
+
+The chirochore structure of bacterial genomes is illustrated below by a screenshot
+of a part of figure 1 from Lobry (1996). See the example section to reproduce this
+figure.
+
+\if{html}{\figure{gcskewmbe96.pdf}{options: width=400}}
+\if{latex}{\figure{gcskewmbe96.pdf}{options: width=12cm}}
+
+
+}
+\source{
+\emph{Escherichia coli} K-12 strain MG1655. Fragment from U00096 from the
+EBI Genome Reviews. Acnuc Release 7. Last Updated: Feb 26, 2007.
+XX
+DT 18-FEB-2004 (Rel. .1, Created)
+DT 09-JAN-2007 (Rel. 65, Last updated, Version 70)
+XX
+}
+\references{
+Lobry, J.R. (1996) Asymmetric substitution patterns in the two DNA strands of
+bacteria. \emph{Molecular Biology and Evolution}, \bold{13}:660-665.\cr
+
+F.R. Blattner, G. Plunkett III, C.A. Bloch, N.T. Perna, V. Burland, M. Rilley,
+J. Collado-Vides, J.D. Glasner, C.K. Rode, G.F. Mayhew, J. Gregor,
+N.W. Davis, H.A. Kirkpatrick, M.A. Goeden, D.J. Rose, B. Mau, and
+Y. Shao. (1997) The complete genome sequence of \emph{Escherichia coli} K-12.
+\emph{Science}, \bold{277}:1453-1462\cr
+
+\code{citation("seqinr")}
+}
+\examples{
+#
+# Load data:
+#
+data(m16j)
+#
+# Define a function to compute the GC skew:
+#
+gcskew <- function(x) {
+ if (!is.character(x) || length(x) > 1)
+ stop("single string expected")
+ tmp <- tolower(s2c(x))
+ nC <- sum(tmp == "c")
+ nG <- sum(tmp == "g")
+ if (nC + nG == 0)
+ return(NA)
+ return(100 * (nC - nG)/(nC + nG))
+}
+#
+# Moving window along the sequence:
+#
+step <- 10000
+wsize <- 10000
+starts <- seq(from = 1, to = nchar(m16j), by = step)
+starts <- starts[-length(starts)]
+n <- length(starts)
+result <- numeric(n)
+for (i in seq_len(n)) {
+ result[i] <- gcskew(substr(m16j, starts[i], starts[i] + wsize - 1))
+}
+#
+# Plot the result:
+#
+xx <- starts/1000
+yy <- result
+n <- length(result)
+hline <- 0
+plot(yy ~ xx, type = "n", axes = FALSE, ann = FALSE, ylim = c(-10, 10))
+polygon(c(xx[1], xx, xx[n]), c(min(yy), yy, min(yy)), col = "black", border = NA)
+usr <- par("usr")
+rect(usr[1], usr[3], usr[2], hline, col = "white", border = NA)
+lines(xx, yy)
+abline(h = hline)
+box()
+axis(1, at = seq(0, 1600, by = 200))
+axis(2, las = 1)
+title(xlab = "position (Kbp)", ylab = "(C-G)/(C+G) [percent]",
+ main = expression(paste("GC skew in ", italic(Escherichia~coli))))
+arrows(860, 5.5, 720, 0.5, length = 0.1, lwd = 2)
+text(860, 5.5, "origin of replication", pos = 4)
+}
+\keyword{datasets}
diff --git a/man/mase.Rd b/man/mase.Rd
new file mode 100644
index 0000000..ed4e47c
--- /dev/null
+++ b/man/mase.Rd
@@ -0,0 +1,10 @@
+\name{mase}
+ \docType{data}
+ \alias{mase}
+ \title{Example of results obtained after a call to read.alignment}
+ \description{This data set gives an example of a protein alignment obtained after a call to the function read.alignment on an alignment file in "mase" format.}
+ \usage{mase}
+ \format{A List of class alignment}
+ \source{http://www.clustal.org/}
+ \references{Faullcner.D.V. and Jurka,J. (1988) \emph{Multiple sequences alignment editor(MASE).} Trends Biochem. Sa., 13, 321-322.}
+ \keyword{datasets}
diff --git a/man/modifylist.Rd b/man/modifylist.Rd
new file mode 100644
index 0000000..4fc7d11
--- /dev/null
+++ b/man/modifylist.Rd
@@ -0,0 +1,85 @@
+\name{modifylist}
+\alias{modifylist}
+\title{Modification of an ACNUC list}
+\description{
+This function modifies a previously existing ACNUC list by selecting sequences
+either by length, either by date, either for the presence of a given string in annotations.
+}
+\usage{
+modifylist(listname, modlistname = listname, operation,
+ type = c("length", "date", "scan"), socket = autosocket(),
+ virtual = FALSE, verbose = FALSE)
+}
+\arguments{
+ \item{listname}{the name of the ACNUC list to modify}
+ \item{modlistname}{the name of the modified ACNUC list. Default is to use the
+ same list name so that previous list is lost.}
+ \item{operation}{a string of character describing the operation to be done, see details.}
+ \item{type}{the type of operation, could be one of \code{ "length", "date", "scan"}.
+ Default is \code{"length"}}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{virtual}{if TRUE, no attempt is made to retrieve the information about all the elements of the list. In this case, the \code{req} component of the list is set to NA.}
+ \item{verbose}{logical, if TRUE mode verbose is on}
+}
+
+\details{
+Example of possible values for the argument \code{operation}:
+
+ \describe{
+ \item{length}{as in "> 10000" or "< 500"}
+ \item{date}{as in "> 1/jul/2001" or "< 30/AUG/98"}
+ \item{scan}{specify the string to be searched for}
+ }
+Character < is to be understood as <= and > likewise.
+}
+
+\value{
+The result is directly assigned to the object \code{modlistname} in the user workspace.
+This is an objet of class \code{qaw}, a list with the following 6 components:
+
+ \item{call}{the original call}
+ \item{name}{the ACNUC list name}
+ \item{nelem}{the number of elements (for instance sequences) in the ACNUC list}
+ \item{typelist}{the type of the elements of the list. Could be SQ for a list of
+ sequence names, KW for a list of keywords, SP for a list of species names.}
+ \item{req}{a list of sequence names that fit the required criteria or \code{NA} when
+ called with parameter \code{virtual} is \code{TRUE}}
+ \item{socket}{the socket connection that was used}
+
+}
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}} and
+ \code{\link{prepgetannots}} to select the annotation lines for scan.}
+\examples{
+\dontrun{ # Need internet connection
+ choosebank("emblTP")
+ mylist <- query("mylist", "sp=felis catus et t=cds", virtual=TRUE)
+ mylist$nelem # 603 sequences
+ stopifnot(mylist$nelem == 603)
+
+ # select sequences with at least 1000 bp:
+ mylist <- modifylist("mylist", operation = ">1000", virtual = TRUE)
+ mylist$nelem # now, only 132 sequences
+ stopifnot(mylist$nelem == 132)
+
+ # scan for "felis" in annotations:
+ mylist <- modifylist("mylist", op = "felis", type = "scan", virtual = TRUE)
+ mylist$nelem # now, only 33 sequences
+ stopifnot(mylist$nelem == 33)
+
+ # modify by date:
+ mylist <- modifylist("mylist", op = "> 1/jul/2001", type = "date", virtual = TRUE)
+ mylist$nelem # now, only 15 sequences
+ stopifnot(mylist$nelem == 15)
+
+ # Summary of current ACNUC lists, one list called MYLIST on sever:
+ sapply(alr()$rank, getliststate)
+ closebank()
+ }
+}
+\keyword{ utilities }
diff --git a/man/move.Rd b/man/move.Rd
new file mode 100644
index 0000000..f258c42
--- /dev/null
+++ b/man/move.Rd
@@ -0,0 +1,43 @@
+\name{move}
+\alias{move}
+\alias{mv}
+\title{Rename an R object}
+\description{
+Rename object \code{from} into \code{to}.
+}
+\usage{
+move(from, to)
+mv(from, to)
+}
+\arguments{
+ \item{from}{an R object name}
+ \item{to}{the new R object name}
+}
+\value{
+none.
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{swap}} }
+
+\examples{
+#
+# Example in a new empty environment:
+#
+local({
+ zefplock <- pi
+ print(ls())
+ print(zefplock)
+ mv(zefplock, toto)
+ print(ls())
+ print(toto)
+ stopifnot(identical(toto, pi)) # Sanity check
+})
+#
+# Check that self-affectation is possible:
+#
+mv(mv, mv) # force self-affectation for the function itself
+mv(mv, mv) # OK, function mv() still exists
+}
diff --git a/man/msf.Rd b/man/msf.Rd
new file mode 100644
index 0000000..94ba0f8
--- /dev/null
+++ b/man/msf.Rd
@@ -0,0 +1,9 @@
+\name{msf}
+ \docType{data}
+ \alias{msf}
+ \title{Example of results obtained after a call to read.alignment}
+ \description{This data set gives an example of a protein alignment obtained after a call to the function read.alignment on an alignment file in "msf" format.}
+ \usage{msf}
+ \format{A List of class alignment}
+ \source{http://www.ebi.ac.uk/2can/tutorials/formats.html#MSF/}
+ \keyword{datasets}
diff --git a/man/n2s.Rd b/man/n2s.Rd
new file mode 100644
index 0000000..3491eb7
--- /dev/null
+++ b/man/n2s.Rd
@@ -0,0 +1,40 @@
+\name{n2s}
+\alias{n2s}
+\title{ function to convert the numeric encoding of a DNA sequence
+into a vector of characters }
+\description{
+By default, if no `levels' arguments is provided, this function
+will just transform your vector of integer into a DNA sequence
+according to the lexical order: \code{0 -> "a"}, \code{1 -> "c"}, \code{ 2 -> "g"},
+\code{3 -> "t"}, \code{others -> NA}.
+}
+\usage{
+n2s(nseq, levels = c("a", "c", "g", "t"), base4 = TRUE)
+}
+\arguments{
+ \item{nseq}{A vector of integers }
+ \item{levels}{the translation vector }
+ \item{base4}{when this logical is true, the numerical encoding of
+\code{levels} starts at 0, when it is false the numerical encoding of
+\code{levels} starts at 1.}
+}
+\value{
+a vector of characters
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{s2n}} }
+\examples{
+##example of the default behaviour:
+nseq <- sample(x = 0:3, size = 100, replace = TRUE)
+n2s(nseq)
+# Show what happens with out-of-range and NA values:
+nseq[1] <- NA
+nseq[2] <- 777
+n2s(nseq)[1:10]
+# How to get an RNA instead:
+n2s(nseq, levels = c("a", "c", "g", "u"))
+}
+\keyword{ utilities }
diff --git a/man/oriloc.Rd b/man/oriloc.Rd
new file mode 100644
index 0000000..9be6030
--- /dev/null
+++ b/man/oriloc.Rd
@@ -0,0 +1,147 @@
+\name{oriloc}
+\alias{oriloc}
+\title{ Prediction of origin and terminus of replication in bacteria.}
+\description{
+This program finds the putative origin and terminus of
+replication in procaryotic genomes. The program discriminates
+between codon positions.
+}
+\usage{
+oriloc(seq.fasta = system.file("sequences/ct.fasta.gz", package = "seqinr"),
+ g2.coord = system.file("sequences/ct.predict", package = "seqinr"),
+ glimmer.version = 3,
+oldoriloc = FALSE, gbk = NULL, clean.tmp.files = TRUE, rot = 0)
+}
+\arguments{
+ \item{seq.fasta}{Character: the name of a file which contains the DNA sequence
+ of a bacterial chromosome in fasta format. The default value,
+ \code{system.file("sequences/ct.fasta.gz", package ="seqinr")} is
+ the fasta file \code{ct.fasta.gz}. This is the file
+ for the complete genome sequence of \emph{Chlamydia trachomatis}
+ that was used in Frank and Lobry (2000). You can replace
+ this by something like \code{seq.fasta = "myseq.fasta"} to work
+ with your own data if the file \code{myseq.fasta} is present in
+ the current working directory (see \code{\link{getwd}}), or give
+ a full path access to the sequence file (see \code{\link{file.choose}}).}
+ \item{g2.coord}{Character: the name of file which contains the output of
+ glimmer program (\code{*.predict} in glimmer version 3)}
+ \item{glimmer.version}{Numeric: glimmer version used, could be 2 or 3}
+ \item{oldoriloc}{Logical: to be set at TRUE to reproduce the
+(deprecated) outputs of previous (publication date: 2000) version
+of the oriloc program.}
+ \item{gbk}{Character: the URL of a file in GenBank format. When provided
+ \code{oriloc} use as input a single GenBank file instead of the \code{seq.fasta}
+ and the \code{g2.coord}. A local temporary copy of the GenBank file is
+ made with \code{\link{download.file}} if \code{gbk} starts with
+ \code{http://} or \code{ftp://} or \code{file://} and whith
+ \code{\link{file.copy}} otherwise. The local copy is then used as
+ input for \code{\link{gb2fasta}} and \code{\link{gbk2g2}} to produce
+ a fasta file and a glimmer-like (version 2) file, respectively, to be used
+ by oriloc instead of \code{seq.fasta} and \code{g2.coord} .}
+ \item{clean.tmp.files}{Logical: if TRUE temporary files generated when
+ working with a GenBank file are removed.}
+ \item{rot}{Integer, with zero default value, used to permute circurlarly the genome. }
+}
+\details{
+The method builds on the fact that there are compositional asymmetries between
+the leading and the lagging strand for replication. The programs works only
+with third codon positions so as to increase the signal/noise ratio.
+To discriminate between codon positions, the program use as input either
+an annotated genbank file, either a fasta file and a glimmer2.0 (or
+glimmer3.0) output
+file.
+}
+\value{
+ A data.frame with seven columns: \code{g2num} for the CDS number in
+the \code{g2.coord} file, \code{start.kb} for the start position of CDS
+expressed in Kb (this is the position of the first occurence of a
+nucleotide in a CDS \emph{regardless} of its orientation), \code{end.kb}
+for the last position of a CDS, \code{CDS.excess} for the DNA walk for
+gene orientation (+1 for a CDS in the direct strand, -1 for a CDS in
+the reverse strand) cummulated over genes, \code{skew} for the cummulated
+composite skew in third codon positions, \code{x} for the cummulated
+T - A skew in third codon position, \code{y} for the cummulated C - G
+skew in third codon positions.
+}
+\references{
+
+More illustrated explanations to help understand oriloc outputs
+are available there:
+\url{https://pbil.univ-lyon1.fr/software/Oriloc/howto.html}.\cr
+
+Examples of oriloc outputs on real sequence data are there:
+\url{https://pbil.univ-lyon1.fr/software/Oriloc/index.html}.\cr
+
+The original paper for oriloc:\cr
+Frank, A.C., Lobry, J.R. (2000) Oriloc: prediction of replication
+boundaries in unannotated bacterial chromosomes. \emph{Bioinformatics},
+\bold{16}:566-567.\cr
+\url{http://bioinformatics.oupjournals.org/cgi/reprint/16/6/560}\cr\cr
+
+A simple informal introduction to DNA-walks:\cr
+Lobry, J.R. (1999) Genomic landscapes. \emph{Microbiology Today},
+\bold{26}:164-165.\cr
+\url{http://www.microbiologysociety.org/download.cfm/docid/C66F92D0-6292-4CB0-83E344560770384C}\cr\cr
+
+An early and somewhat historical application of DNA-walks:\cr
+Lobry, J.R. (1996) A simple vectorial representation of DNA sequences
+for the detection of replication origins in bacteria. \emph{Biochimie},
+\bold{78}:323-326.\cr
+
+Glimmer, a very efficient open source software for the prediction of CDS from scratch
+in prokaryotic genome, is decribed at \url{http://www.cbcb.umd.edu/software/glimmer/}.\cr
+For a description of Glimmer 1.0 and 2.0 see:\cr
+
+Delcher, A.L., Harmon, D., Kasif, S., White, O., Salzberg, S.L. (1999)
+Improved microbial gene identification with GLIMMER,
+\emph{Nucleic Acids Research}, \bold{27}:4636-4641.\cr
+
+Salzberg, S., Delcher, A., Kasif, S., White, O. (1998)
+Microbial gene identification using interpolated Markov models,
+\emph{Nucleic Acids Research}, \bold{26}:544-548.\cr
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry, A.C. Frank}
+\seealso{ \code{\link{draw.oriloc}}, \code{\link{rearranged.oriloc}} }
+
+\note{ The method works only for genomes having a single origin of replication
+from which the replication is bidirectional. To detect the composition changes,
+a DNA-walk is performed. In a 2-dimensional DNA walk, a C in the sequence
+corresponds to the movement in the positive y-direction and G to a movement
+in the negative y-direction. T and A are mapped by analogous steps along the
+x-axis. When there is a strand asymmetry, this will form a trajectory that
+turns at the origin and terminus of replication. Each step is the sum of
+nucleotides in a gene in third codon positions. Then orthogonal regression is
+used to find a line through this trajectory. Each point in the trajectory will
+have a corresponding point on the line, and the coordinates of each are
+calculated. Thereafter, the distances from each of these points to the origin
+(of the plane), are calculated. These distances will represent a form of
+cumulative skew. This permets us to make a plot with the gene position (gene
+number, start or end position) on the x-axis and the cumulative skew (distance)
+at the y-axis. Depending on where the sequence starts, such a plot will display
+one or two peaks. Positive peak means origin, and negative means terminus.
+In the case of only one peak, the sequence starts at the origin or terminus
+site. }
+\examples{
+\dontrun{
+#
+# A little bit too long for routine checks because oriloc() is already
+# called in draw.oriloc.Rd documentation file. Try example(draw.oriloc)
+# instead, or copy/paste the following code:
+#
+out <- oriloc()
+plot(out$st, out$sk, type = "l", xlab = "Map position in Kb",
+ ylab = "Cumulated composite skew",
+ main = expression(italic(Chlamydia~~trachomatis)~~complete~~genome))
+#
+# Example with a single GenBank file:
+#
+out2 <- oriloc(gbk="ftp://pbil.univ-lyon1.fr/pub/seqinr/data/ct.gbk")
+draw.oriloc(out2)
+#
+# (some warnings are generated because of join in features and a gene that
+# wrap around the genome)
+#
+}
+}
diff --git a/man/pK.Rd b/man/pK.Rd
new file mode 100644
index 0000000..b174638
--- /dev/null
+++ b/man/pK.Rd
@@ -0,0 +1,88 @@
+\name{pK}
+\alias{pK}
+\docType{data}
+\title{pK values for the side chain of charged amino acids from various sources}
+\description{
+This compilation of pK values is from Joanna Kiraga (2008).
+}
+\usage{data(pK)}
+\format{
+ A data frame with the seven charged amino-acid in row and
+ six sources in column. The rownames are the one-letter code
+ for amino-acids.
+}
+\source{
+Table 2 in Kiraga (2008).
+}
+\references{
+Kiraga, J. (2008) Analysis and computer simulations of variability of
+isoelectric point of proteins in the proteomes. PhD thesis, University
+of Wroclaw, Poland.
+
+Bjellqvist, B., Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F., Sanchez, J.Ch.,
+Frutige,r S., Hochstrasser D. (1993) The focusing positions of polypeptides in
+immobilized pH gradients can be predicted from their amino acid sequences.
+\emph{Electrophoresis}, \bold{14}:1023-1031.
+
+EMBOSS data were from release 5.0 and were still the same in release 6.6
+\url{http://emboss.sourceforge.net/apps/release/6.6/emboss/apps/iep.html}
+last visited 2016-06-03.
+
+Murray, R.K., Granner, D.K., Rodwell, V.W. (2006)
+\emph{Harper's illustrated Biochemistry.}
+27th edition. Published by The McGraw-Hill Companies.
+
+Sillero, A., Maldonado, A. (2006) Isoelectric point determination of proteins
+and other macromolecules: oscillating method.
+\emph{Comput Biol Med.}, \bold{36}:157-166.
+
+Solomon, T.W.G. (1998) \emph{Fundamentals of Organic Chemistry}, 5th edition.
+Published by Wiley.
+
+Stryer L. (1999) \emph{Biochemia}. czwarta edycja. Wydawnictwo Naukowe PWN.
+
+\code{citation("seqinr")}
+}
+
+\examples{
+data(pK)
+data(SEQINR.UTIL) # for N and C terminal pK values
+prot <- s2c("ACDEFGHIKLMNPQRSTVWY")
+compoAA <- table(factor(prot, levels = LETTERS))
+nTermR <- which(LETTERS == prot[1])
+cTermR <- which(LETTERS == prot[length(seq)])
+
+computeCharge <- function(pH, compoAA, pK, nTermResidue, cTermResidue){
+ cter <- 10^(-SEQINR.UTIL$pk[cTermResidue,1]) /
+ (10^(-SEQINR.UTIL$pk[cTermResidue,1]) + 10^(-pH))
+ nter <- 10^(-pH) / (10^(-SEQINR.UTIL$pk[nTermResidue,2]) + 10^(-pH))
+ carg <- as.vector(compoAA['R'] * 10^(-pH) / (10^(-pK['R']) + 10^(-pH)))
+ chis <- as.vector(compoAA['H'] * 10^(-pH) / (10^(-pK['H']) + 10^(-pH)))
+ clys <- as.vector(compoAA['K'] * 10^(-pH) / (10^(-pK['K']) + 10^(-pH)))
+ casp <- as.vector(compoAA['D'] * 10^(-pK['D']) /(10^(-pK['D']) + 10^(-pH)))
+ cglu <- as.vector(compoAA['E'] * 10^(-pK['E']) / (10^(-pK['E']) + 10^(-pH)))
+ ccys <- as.vector(compoAA['C'] * 10^(-pK['C']) / (10^(-pK['C']) + 10^(-pH)))
+ ctyr <- as.vector(compoAA['Y'] * 10^(-pK['Y']) / (10^(-pK['Y']) + 10^(-pH)))
+ charge <- carg + clys + chis + nter - (casp + cglu + ctyr + ccys + cter)
+ return(charge)
+}
+
+pHseq <- seq(from = 0, to = 14, by = 0.1)
+Bje <- pK$Bjellqvist
+names(Bje) <- rownames(pK)
+res <- computeCharge(pHseq, compoAA, Bje, nTermR, cTermR)
+plot(pHseq, res, type = "l", ylab = "Charge", las = 1,
+ main = paste("Charge of protein\n",c2s(prot)),
+ xlab = "pH")
+for(j in 2:ncol(pK)){
+ src <- pK[,j]
+ names(src) <- rownames(pK)
+ res <- computeCharge(pHseq, compoAA, src, nTermR, cTermR)
+ lines(pHseq, res, lty = j, col = rainbow(5)[j])
+}
+
+abline(h=0)
+abline(v=computePI(prot))
+legend("bottomleft", inset = 0.01, colnames(pK), lty = 1:6, col = c("black", rainbow(5)))
+}
+\keyword{datasets}
diff --git a/man/parser.socket.Rd b/man/parser.socket.Rd
new file mode 100644
index 0000000..8476525
--- /dev/null
+++ b/man/parser.socket.Rd
@@ -0,0 +1,28 @@
+\name{parser.socket}
+\alias{parser.socket}
+
+\title{Utility function to parse answers from an ACNUC server}
+\description{
+Answers from server looks like : "code=0&lrank=2&count=150513&type=SQ&locus=F".
+}
+\usage{
+parser.socket(onelinefromserver, verbose = FALSE)
+}
+\arguments{
+ \item{onelinefromserver}{a string}
+ \item{verbose}{logical, if TRUE mode verbose is on}
+}
+\value{
+A vector of mode character or NULL if \code{onelinefromserver} is NULL
+or if its length is 0.
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}} }
+\examples{
+stopifnot(all(parser.socket("code=0&lrank=2&count=150513&type=SQ&locus=F")
+ == c("0", "2", "150513", "SQ", "F")))
+}
+\keyword{ utilities }
diff --git a/man/peakabif.Rd b/man/peakabif.Rd
new file mode 100644
index 0000000..dddc397
--- /dev/null
+++ b/man/peakabif.Rd
@@ -0,0 +1,68 @@
+\name{peakabif}
+\alias{peakabif}
+\title{Extraction of Peak locations, Heights and Surfaces from ABIF data}
+\description{
+ Simple peak location for data imported with the \code{\link{read.abif}} function
+ using cubic spline interpolation.
+}
+\usage{
+peakabif(abifdata,
+ chanel,
+ npeak,
+ thres = 400/yscale,
+ fig = TRUE,
+ chanel.names = c(1:4,105),
+ DATA = paste("DATA", chanel.names[chanel], sep = "."),
+ tmin = 1/tscale,
+ tmax = abifdata$Data[["SCAN.1"]]/tscale,
+ tscale = 1000,
+ yscale = 1000,
+ irange = (tmin*tscale):(tmax*tscale),
+ y = abifdata$Data[[DATA]][irange]/yscale,
+ method = "monoH.FC",
+ maxrfu = 1000,
+ ...)
+}
+\arguments{
+ \item{abifdata}{the result returned by \code{\link{read.abif}}}
+ \item{chanel}{the dye number}
+ \item{npeak}{the expected number of peaks}
+ \item{thres}{scaled threshold value}
+ \item{fig}{logical: should localized peaks be plotted}
+ \item{chanel.names}{numbers extensions used for the DATA}
+ \item{DATA}{names of the DATA components}
+ \item{tmin}{scaled starting time for the time axis}
+ \item{tmax}{scaled ending time for the time axis}
+ \item{tscale}{scale factor for the time axis}
+ \item{yscale}{scale factor for the y-axis (RFU)}
+ \item{irange}{indices of data to be plotted}
+ \item{y}{values used for the y-axis}
+ \item{method}{method to be used by \code{\link{splinefun}}}
+ \item{maxrfu}{argument passed to \code{\link{baselineabif}}}
+ \item{...}{arguments forwarded to \code{\link{plot}}}
+}
+\value{
+ Returns invisibly a list with the unscaled values for the locations of peaks,
+ heights of peaks and surfaces of peaks and baseline estimate. The peak
+ location are in datapoint units, that is an integer starting at 1 for the
+ first experimental point, 2 for the second experimental point, etc.
+ However, due to interpolation between points
+ the estimated peak location is usually not an integer.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{
+ function \code{\link{read.abif}} to import files in ABIF format,
+ \code{\link{plotabif}} to plot them,
+ data \code{\link{gs500liz}} for internal size standards,
+ data \code{\link{identifiler}} for allele names in the allelic ladder,
+ data \code{\link{JLO}} for an example of an individual sample file,
+ data \code{\link{ECH}} for an example of an allelic lader.
+}
+\examples{
+data(JLO)
+JLO.maxis <- peakabif(JLO, 5, npeak = 14, tmin = 2.7, thres = 0.1)$maxis
+}
+
diff --git a/man/permutation.Rd b/man/permutation.Rd
new file mode 100644
index 0000000..762f62d
--- /dev/null
+++ b/man/permutation.Rd
@@ -0,0 +1,67 @@
+\name{permutation}
+\alias{permutation}
+\title{Sequence permutation according to several different models}
+\description{
+ Generates a random permutation of a given sequence, according to a
+ given model. Available models are : \code{base}, \code{position},
+ \code{codon}, \code{syncodon}.
+}
+\usage{permutation(sequence,modele='base',frame=0,
+ replace=FALSE,prot=FALSE,numcode=1,ucoweight = NULL)}
+\arguments{
+ \item{sequence}{ A nucleic acids sequence }
+ \item{modele}{ A string of characters describing the model chosen for
+ the random generation }
+ \item{frame}{ Only active for the \code{position}, \code{codon},
+ \code{syncodon} models: starting position of CDS as in \code{splitseq} }
+ \item{replace}{ This option is not active for the \code{syncodon}
+ model: if \code{TRUE}, sampling is done with replacement }
+ \item{prot}{ Only available for the \code{codon} model: if \code{TRUE}, the first and last codons are preserved, and only intern codons are shuffled }
+ \item{numcode}{ Only available for the \code{syncodon} model: the
+ genetic code number as in \code{translate}. }
+ \item{ucoweight}{ A list of weights containing the desired codon usage
+ bias as generated by \code{ucoweight}. If none is specified, the
+ codon usage of the given sequence is used. }
+}
+\value{
+ a sequence generated from the original one by a given model
+}
+\details{
+ The \code{base} model allows for random sequence generation by
+ shuffling (with/without replacement) of all bases in the sequence.
+
+ The \code{position} model allows for random sequence generation
+ by shuffling (with/without replacement) of bases within their
+ position in the codon (bases in position I, II or III stay in
+ position I, II or III in the new sequence.
+
+ The \code{codon} model allows for random sequence generation by
+ shuffling (with/without replacement) of codons.
+
+ The \code{syncodon} model allows for random sequence generation
+ by shuffling (with/without replacement) of synonymous codons.
+ }
+\references{
+ \code{citation("seqinr")}
+}
+\author{L. Palmeira}
+\seealso{ \code{\link{synsequence}} }
+\examples{
+ data(ec999)
+ sequence=ec999[1][[1]]
+
+ new=permutation(sequence,modele='base')
+ identical(all.equal(count(new,1),count(sequence,1)),TRUE)
+
+ new=permutation(sequence,modele='position')
+ identical(all.equal(GC(new),GC(sequence)),TRUE)
+ identical(all.equal(GC2(new),GC2(sequence)),TRUE)
+ identical(all.equal(GC3(new),GC3(sequence)),TRUE)
+
+ new=permutation(sequence,modele='codon')
+ identical(all.equal(uco(new),uco(sequence)),TRUE)
+
+ new=permutation(sequence,modele='syncodon',numcode=1)
+ identical(all.equal(translate(new),translate(sequence)),TRUE)
+}
+\keyword{ utilities }
diff --git a/man/phylip.Rd b/man/phylip.Rd
new file mode 100644
index 0000000..d90c1af
--- /dev/null
+++ b/man/phylip.Rd
@@ -0,0 +1,10 @@
+\name{phylip}
+ \docType{data}
+ \alias{phylip}
+ \title{Example of results obtained after a call to read.alignment}
+ \description{This data set gives an example of a amino acids alignment obtained after a call to the function read.alignment on an alignment file in "phylip" format.}
+ \usage{phylip}
+ \format{A List of class alignment}
+ \source{http://evolution.genetics.washington.edu/phylip.html}
+ \references{http://evolution.genetics.washington.edu/phylip.html}
+ \keyword{datasets}
diff --git a/man/plot.SeqAcnucWeb.Rd b/man/plot.SeqAcnucWeb.Rd
new file mode 100644
index 0000000..c08bf84
--- /dev/null
+++ b/man/plot.SeqAcnucWeb.Rd
@@ -0,0 +1,35 @@
+\name{plot.SeqAcnucWeb}
+\alias{plot.SeqAcnucWeb}
+\title{To Plot Subsequences on the Parent Sequence}
+\description{
+ This function plots all the type of subsequences on a parent sequence. Subsequences are represented by colored rectangle on the parent sequence. For example, types could be CDS, TRNA, RRNA .... In order to get all the types that are available for the selected database, use \code{getType}.
+}
+\usage{
+\method{plot}{SeqAcnucWeb}(x, types = getType()$sname, socket = autosocket(), ...)
+}
+\arguments{
+ \item{x}{A sequence of class \code{SeqAcnucWeb}}
+ \item{types}{ The type of subsequences to plot. Default value is to consider
+ all possible subsequence types.}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{\dots}{not currently used}
+}
+\value{
+ An invisible list giving, for each subsequence, its position on the parent sequence.
+}
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry}
+\seealso{\code{\link{getType}}, \code{\link{query}} }
+\examples{
+\dontrun{
+ ### Need internet connection
+ choosebank("emblTP")
+ mylist <- query("mylist", "AC=AB078009")
+ plot(mylist$req[[1]])
+ }
+}
+\keyword{ hplot }
diff --git a/man/plotPanels.Rd b/man/plotPanels.Rd
new file mode 100644
index 0000000..83556f5
--- /dev/null
+++ b/man/plotPanels.Rd
@@ -0,0 +1,26 @@
+\name{plotPanels}
+\alias{plotPanels}
+\title{Representation of Amplicon Size Ranges of a STR kit.}
+\description{Plot amplicon size ranges grouped by dye color.}
+\usage{plotPanels(kitname, data, xlim = NULL, cex = 0.75, alpha = 0.5)}
+\arguments{
+ \item{kitname}{string of characters for the kit name.}
+ \item{data}{an output from the \code{\link{readPanels}} function.}
+ \item{xlim}{x-axis range.}
+ \item{cex}{character expansion factor.}
+ \item{alpha}{alpha transparency chanel for colors.}
+}
+\value{none}
+\author{J.R. Lobry}
+
+\seealso{ \code{\link{readPanels}}.}
+
+\examples{
+path1 <- system.file("abif/AmpFLSTR_Panels_v1.txt", package = "seqinr")
+res1 <- readPanels(path1)
+
+par(mfrow = c(2,1))
+plotPanels("Identifiler_v1", res1)
+plotPanels("SEfiler_v1", res1)
+
+}
diff --git a/man/plotabif.Rd b/man/plotabif.Rd
new file mode 100644
index 0000000..6549d25
--- /dev/null
+++ b/man/plotabif.Rd
@@ -0,0 +1,75 @@
+\name{plotabif}
+\alias{plotabif}
+\title{Electrophoregram plot for ABIF data}
+\description{
+ Simple chromatogram plot for data imported with the \code{\link{read.abif}} function.
+}
+\usage{
+plotabif(abifdata,
+ chanel = 1,
+ tmin = 1/tscale,
+ tmax = abifdata$Data[["SCAN.1"]]/tscale,
+ tscale = 1000,
+ yscale = 1000, type = "l", las = 1,
+ xlab = paste("Time", tscale, sep = "/"),
+ ylab = paste("RFU", yscale, sep = "/"),
+ irange = (tmin*tscale):(tmax*tscale),
+ x = irange/tscale,
+ xlim = c(tmin, tmax),
+ chanel.names = c(1:4,105),
+ DATA = paste("DATA", chanel.names[chanel], sep = "."),
+ y = abifdata$Data[[DATA]][irange]/yscale,
+ ylim = c(min(y), max(y)),
+ dyn = abifdata$Data[[paste("DyeN", chanel, sep = ".")]],
+ main = paste(deparse(substitute(abifdata)), chanel, dyn, sep = " ; "),
+ calibr = NULL,
+ ladder.bp = NULL,
+ allele.names = "identifiler",
+ ladder.lab = TRUE,
+ ...)
+}
+\arguments{
+ \item{abifdata}{the result returned by \code{\link{read.abif}}}
+ \item{chanel}{the dye number}
+ \item{tmin}{scaled starting time for the time axis}
+ \item{tmax}{scaled ending time for the time axis}
+ \item{tscale}{scale factor for the time axis}
+ \item{yscale}{scale factor for the y-axis (RFU)}
+ \item{type}{type of line drawing forwarded to \code{\link{plot}}}
+ \item{las}{orientation of axis labels forwarded to \code{\link{plot}}}
+ \item{xlab}{x-axis label forwarded to \code{\link{plot}}}
+ \item{ylab}{y-axis label forwarded to \code{\link{plot}}}
+ \item{irange}{indices of data to be plotted}
+ \item{x}{values used for the x-axis}
+ \item{xlim}{limits for the x-axis forwarded to \code{\link{plot}}}
+ \item{chanel.names}{numbers extensions used for the DATA}
+ \item{DATA}{names of the DATA components}
+ \item{y}{values used for the y-axis}
+ \item{ylim}{limits for the y-axis forwarded to \code{\link{plot}}}
+ \item{dyn}{dye name}
+ \item{main}{title for the plot forwarded to \code{\link{plot}}}
+ \item{calibr}{an optional calibration function to convert time into bp}
+ \item{ladder.bp}{an optional ladder scale in bp (calibr must be provided)}
+ \item{allele.names}{name of the dataset with allele names}
+ \item{ladder.lab}{logical: should allele names be added on plot}
+ \item{...}{arguments forwarded to \code{\link{plot}}}
+}
+\value{
+ Returns invisibly its local graphical parameter settings.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{
+ function \code{\link{read.abif}} to import files in ABIF format,
+ data \code{\link{gs500liz}} for internal size standards,
+ data \code{\link{identifiler}} for allele names in the allelic ladder,
+ data \code{\link{JLO}} for an example of an individual sample file,
+ data \code{\link{ECH}} for an example of an allelic lader.
+}
+\examples{
+data(ECH)
+plotabif(ECH,chanel = 1, tmin = 3.2, tmax = 6.1)
+}
+
diff --git a/man/plotladder.Rd b/man/plotladder.Rd
new file mode 100644
index 0000000..d60dcb2
--- /dev/null
+++ b/man/plotladder.Rd
@@ -0,0 +1,59 @@
+\name{plotladder}
+\alias{plotladder}
+\title{Simple plot of an allelic ladder from ABIF data}
+\description{
+ Simple representation of an observed allelic ladder.
+}
+\usage{
+plotladder(abifdata, chanel, calibr, allele.names = "identifiler", npeak = NULL, ...)
+}
+\arguments{
+ \item{abifdata}{the result returned by \code{\link{read.abif}}}
+ \item{chanel}{the dye number}
+ \item{calibr}{a mandatory calibration function to convert time into bp}
+ \item{allele.names}{name of the dataset which contains allele names as in \code{link{identifiler}}}
+ \item{npeak}{expected number of peaks, deduced from \code{allele.names} by default}
+ \item{...}{arguments forwarded to \code{\link{peakabif}}}
+}
+\value{
+ Returns invisibly the location of peaks in bp.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{
+ function \code{\link{read.abif}} to import files in ABIF format,
+ \code{\link{plotabif}} to plot them,
+ data \code{\link{gs500liz}} for internal size standards,
+ data \code{\link{identifiler}} for allele names in the allelic ladder,
+ data \code{\link{JLO}} for an example of an individual sample file,
+ data \code{\link{ECH}} for an example of an allelic lader.
+}
+\examples{
+ #
+ # load an example of allelic ladder results from an ABIF (*.fsa) file:
+ #
+data(ECH)
+ #
+ # Extract from internal size standard chanel number 5 the location
+ # of 14 peaks:
+ #
+ECH.maxis <- peakabif(ECH, 5, npeak = 14, tmin = 2.7, thres = 0.1, fig = FALSE)$maxis
+ #
+ # Load data about the expected size of peaks in bp for calibration:
+ #
+data(gs500liz)
+lizbp <- gs500liz$liz # All peaks size in bp
+lizbp[!gs500liz$mask1 | !gs500liz$mask2] <- NA # Mark useless peaks
+lizbp <- lizbp[-c(1,2)] # The first two peaks are not extracted from ECH
+ECH.calibr <- splinefun(ECH.maxis[!is.na(lizbp)], lizbp[!is.na(lizbp)])
+ #
+ # Show the allelic ladder for the 4 dyes:
+ #
+plotladder(ECH, 1, ECH.calibr, tmin = 3.1, thres = 0.3, fig = FALSE)
+plotladder(ECH, 2, ECH.calibr, tmin = 3.1, thres = 0.35, fig = FALSE)
+plotladder(ECH, 3, ECH.calibr, tmin = 3.1, thres = 0.2, fig = FALSE)
+plotladder(ECH, 4, ECH.calibr, tmin = 3.1, thres = 0.2, fig = FALSE)
+}
+
diff --git a/man/pmw.Rd b/man/pmw.Rd
new file mode 100644
index 0000000..1bc5318
--- /dev/null
+++ b/man/pmw.Rd
@@ -0,0 +1,80 @@
+\name{pmw}
+\alias{pmw}
+\title{ Protein Molecular Weight }
+\description{
+ With default parameter values, returns the apparent molecular weight of one mole (6.0221415 e+23)
+ of the input protein expressed in gram at see level on Earth with terrestrial
+ isotopic composition.
+}
+\usage{
+pmw(seqaa, Ar = c(C = 12.0107, H = 1.00794, O = 15.9994,
+N = 14.0067, P = 30.973762, S = 32.065), gravity = 9.81,
+unit = "gram", checkseqaa = TRUE)
+}
+\arguments{
+ \item{seqaa}{ a protein sequence as a vector of single chars. Allowed values are
+ "*ACDEFGHIKLMNPQRSTVWY", non allowed values are ignored. }
+ \item{Ar}{ a named vector for the mean relative atomic masses of CHONPS atoms. Defaults
+ values are from to the natural terrestrial sources according to the 43rd IUPAC
+ General Assembly in Beijing, China in August 2005 (See
+ \url{http://iupac.org/category/recent-releases/}
+ for updates).}
+ \item{gravity}{ gravitational field constant in standard units. Defaults to 9.81 m/s2, that
+ is to the average value at see level on Earth. Negative values are not allowed. }
+ \item{unit}{ a string that could be "gram" to get the result in grams (1 g = 0.001 kg)
+ or "N" to get the result in Newton units (1 N = 1 kg.m/s2). }
+ \item{checkseqaa}{ if TRUE \code{pmw()} warns if a non-allowed character in seqaa is found.}
+}
+\details{
+\describe{
+\item{Algorithm}{Computing the molecular mass of a protein is close to a linear form on
+ amino-acid frequencies, but not exactly since we have to remove n - 1 water
+ molecules for peptidic bound formation.}
+\item{Cysteine}{All cysteines are supposed to be in reduced (-SH) form.}
+\item{Methionine}{All methionines are supposed to be not oxidized.}
+\item{Modifications}{No post-traductional modifications (such as phosphorylations)
+are taken into account.}
+\item{Rare}{Rare amino-acids (pyrolysine and selenocysteine) are not handled.}
+\item{Warning}{Do not use defaults values for Ar to compute the molecular mass
+of alien's proteins: the isotopic composition for CHONPS atoms could be different
+from terrestrial data in a xenobiotic context. Some aliens are easily offended, make
+sure not to initiate one more galactic war by repporting wrong results. }
+}
+}
+\value{
+ The protein molecular weight as a single numeric value.
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{s2c}}, \code{\link{c2s}}, \code{\link{aaa}}, \code{\link{a}} }
+
+\examples{
+allowed <- s2c("*ACDEFGHIKLMNPQRSTVWY") # All allowed chars in a protein
+pmw(allowed)
+all.equal(pmw(allowed), 2395.71366) # Should be true on most platforms
+#
+# Compute the apparent molecular weight on Moon surface:
+#
+pmw(allowed, g = 1.6)
+#
+# Compute the apparent molecular weight in absence of gravity:
+#
+pmw(allowed, g = 0) # should be zero
+#
+# Reports results in Newton units:
+#
+pmw(allowed, unit = "N")
+#
+# Compute the mass in kg of one mol of this protein:
+#
+pmw(allowed)/10^3
+#
+# Compute the mass for all amino-acids:
+#
+sapply(allowed[-1], pmw) -> aamw
+names(aamw) <- aaa(names(aamw))
+aamw
+}
+\keyword{ utilities }
diff --git a/man/prepgatannots.Rd b/man/prepgatannots.Rd
new file mode 100644
index 0000000..f07e9c0
--- /dev/null
+++ b/man/prepgatannots.Rd
@@ -0,0 +1,73 @@
+\name{prepgetannots}
+\alias{prepgetannots}
+\alias{pga}
+\title{Select annotation lines in an ACNUC database}
+\description{
+This function is called before using \code{\link{getAnnot}} or
+\code{\link{modifylist}} with a \code{scan} type operation to
+select the annotation lines to be returned or scanned.
+}
+
+\usage{
+prepgetannots(what = "all", setfor = c("scan", "getannots"),
+ socket = autosocket(), verbose = FALSE)
+pga(what = "all", setfor = c("scan", "getannots"),
+ socket = autosocket(), verbose = FALSE)
+}
+
+\arguments{
+ \item{what}{the default "all" means that all annotation lines are selected.
+ This can be more specific, see details.}
+ \item{setfor}{this is used when \code{what} has its default "all" value.
+ The behaviour is different for \code{\link{getAnnot}} and
+ \code{\link{modifylist}} with a \code{scan} type operation:
+ annotations but not sequences are scanned, but sequences can
+ be returned by \code{\link{getAnnot}}. The default value is "scan".}
+ \item{socket}{an object of class \code{sockconn} connecting to an ACNUC server}
+ \item{verbose}{logical, if TRUE mode verbose is on}
+}
+
+\details{
+The names of annotation lines in the opened ACNUC database is
+returned by \code{\link{countfreelists}}, they are forced to upper
+case letters by \code{prepgetannots} when supplied with the
+\code{what} argument.
+
+For the EMBL/SWISSPROT format, keys are: ALL, AC, DT, KW, OS, OC, OG, OH,
+RN, RC, RP, RX, RA, RG, RT, RL, DR, AH, AS, CC, FH, FT, SQ, SEQ.
+
+For GenBank: ALL, ACCESSION, VERSION, KEYWORDS, SOURCE, ORGANISM,
+REFERENCE, AUTHORS, CONSRTM, TITLE, JOURNAL, PUBMED, REMARK, COMMENT,
+FEATURES, ORIGIN, SEQUENCE.
+
+For FT (embl, swissprot) and FEATURES (GenBank), one or more specific
+feature keys can be specified using lines with only uppercase and such as
+
+FEATURES|CDS
+FT|TRNA
+
+Keys ALL and SEQ/SEQUENCE stand for all annotation and sequence lines, respectively.
+For the scan operation, key ALL stand for the DE/DEFINITION lines,
+and SEQ/SEQUENCE cannot be used (annotations but not sequence are scanned).}
+
+\value{
+The function returns invisibly the annotation lines names.
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{
+ \code{\link{getAnnot}}, \code{\link{modifylist}}, \code{\link{countfreelists}}
+}
+
+\examples{
+ \dontrun{# Need internet connection
+ choosebank("genbank")
+ mylist <- query("mylist","n=AQF16SRRN")
+ pga() # We want to scan all annotations, including FEATURES
+ mylist <- modifylist("mylist", operation = "strain", type = "scan")
+ mylist$nelem # should be 1
+ }
+}
+\keyword{utilities}
diff --git a/man/prettyseq.Rd b/man/prettyseq.Rd
new file mode 100644
index 0000000..8bb8ebf
--- /dev/null
+++ b/man/prettyseq.Rd
@@ -0,0 +1,35 @@
+\name{prettyseq}
+\alias{prettyseq}
+\title{Text representation of a sequence from an ACNUC server}
+\description{
+To get a text representation of sequence of rank \code{num} and of its subsequences,
+with \code{bpl} bases per line (default = 60), and with optional translation of
+protein-coding subsequences
+}
+\usage{
+prettyseq(num, bpl = 60, translate = TRUE, socket = autosocket())
+}
+\arguments{
+ \item{num}{rank of the sequence in the ACNUC database}
+ \item{bpl}{number of base per line}
+ \item{translate}{should coding sequences be translated?}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+}
+\value{
+An invisible vector of string. The output is redirected to the console.
+}
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}} }
+\examples{
+\dontrun{
+ ### Need internet connection
+ choosebank("emblTP")
+ prettyseq(111)
+ }
+}
+\keyword{ utilities }
diff --git a/man/print.SeqAcnucWeb.Rd b/man/print.SeqAcnucWeb.Rd
new file mode 100644
index 0000000..0518a90
--- /dev/null
+++ b/man/print.SeqAcnucWeb.Rd
@@ -0,0 +1,32 @@
+\name{print.SeqAcnucWeb}
+\alias{print.SeqAcnucWeb}
+\title{Print method for objects from class SeqAcnucWeb}
+\description{
+ Print the name, length, frame and genetic code number.
+}
+\usage{
+\method{print}{SeqAcnucWeb}(x, ...)
+}
+\arguments{
+ \item{x}{A sequence of class \code{SeqAcnucWeb}}
+ \item{\dots}{Arguments passed to \code{print}}
+}
+\value{
+ None.
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{print}} }
+\examples{
+\dontrun{
+ ### Need internet connection
+ choosebank("emblTP")
+ mylist <- query("mylist", "sp=felis catus")
+ mylist$req[[1]]
+ # name length frame ncbicg
+ # "A06937" "34" "0" "1"
+ }
+}
+\keyword{ manip }
diff --git a/man/print.qaw.Rd b/man/print.qaw.Rd
new file mode 100644
index 0000000..c11306c
--- /dev/null
+++ b/man/print.qaw.Rd
@@ -0,0 +1,30 @@
+\name{print.qaw}
+\alias{print.qaw}
+\title{Print method for objects from class qaw}
+\description{
+ Print the number of elements, their type and the corresponding query.
+}
+\usage{
+\method{print}{qaw}(x, ...)
+}
+\arguments{
+ \item{x}{A objet of class \code{qaw}}
+ \item{\dots}{not used}
+}
+\value{
+ None.
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{print}} }
+\examples{
+\dontrun{
+ ### Need internet connection
+ choosebank("emblTP")
+ list1 <- query("sp=felis catus")
+ list1
+ # 4732 SQ for sp=felis catus
+ }
+}
diff --git a/man/prochlo.Rd b/man/prochlo.Rd
new file mode 100644
index 0000000..75c26a3
--- /dev/null
+++ b/man/prochlo.Rd
@@ -0,0 +1,119 @@
+\name{prochlo}
+\alias{prochlo}
+\docType{data}
+\title{ Zscore on three strains of Prochlorococcus marinus }
+\encoding{latin1}
+\description{
+This dataset contains the zscores computed with the codon model on all
+CDS from 3 strains of Procholorococcus marinus (as retrieved from Genome
+Reviews database on June 16, 2005)
+}
+\usage{
+data(prochlo)
+}
+\format{
+ List of three dataframes of the zscore of each of the 16 dinucleotides
+ on each CDS retrieved from the specific strain.
+
+\describe{
+ \item{BX548174}{strain adapted to living at a depth of 5
+ meters (high levels of UV exposure)
+ \code{base} model on each intergenic sequence}
+ \item{AE017126}{strain adapted to living at a depth of 120
+ meters (low levels of UV exposure)}
+ \item{BX548175}{strain adapted to living at a depth of 135
+ meters (low levels of UV exposure)}
+}
+}
+\references{
+ Palmeira, L., Gu�guen, L. and Lobry JR. (2006) UV-targeted dinucleotides
+ are not depleted in light-exposed Prokaryotic genomes.
+ \emph{Molecular Biology and Evolution},
+ \bold{23}:2214-2219.\cr
+ \url{http://mbe.oxfordjournals.org/cgi/reprint/23/11/2214}\cr\cr
+
+ \code{citation("seqinr")}
+}
+\seealso{ \code{\link{zscore}} }
+\examples{
+#
+# Show the four YpY for the three ecotypes:
+#
+data(prochlo)
+oneplot <- function(x){
+ plot(density(prochlo$BX548174[, x]),
+ ylim = c(0,0.4), xlim = c(-4,4), lty=3,
+ main = paste(substr(x,1,1), "p", substr(x,2,2), " bias", sep = ""),
+ xlab="",ylab="",las=1, type = "n")
+ rect(-10,-1,-1.96,10, col = "yellow", border = "yellow")
+ rect(1.96,-1,10,10, col = "yellow", border = "yellow")
+ lines(density(prochlo$BX548174[, x]),lty=3)
+ lines(density(prochlo$AE017126[, x]),lty=2)
+ lines(density(prochlo$BX548175[, x]),lty=1)
+ abline(v=c(-1.96,1.96),lty=5)
+ box()
+}
+par(mfrow=c(2,2),mar=c(2,3,2,0.5) + 0.1)
+oneplot("CT")
+oneplot("TC")
+oneplot("CC")
+oneplot("TT")
+#
+# Show YpY biases with respect to light exposure
+#
+curdev <- getOption("device")
+OK <- FALSE
+devlist <- c("X11", "windows", "quartz") # interactive with width and height in inches
+for(i in devlist){
+ if(exists(i) && identical(get(i), curdev)){
+ OK <- TRUE
+ break
+ }
+}
+if(OK){
+ curdev(width = 18, height = 11)
+ par(oma = c(0, 0, 3, 0), mfrow = c(1, 2), mar = c(5, 4, 0, 0), cex = 1.5)
+ example(waterabs, ask = FALSE) #left figure
+
+ par(mar = c(5, 0, 0, 2))
+ plot(seq(-5, 3, by = 1), seq(0, 150, length = 9), col = "white",
+ ann = FALSE, axes = FALSE, xaxs = "i", yaxs = "i")
+ axis(1, at = c(-1.96, 0, 1.96), labels = c(-1.96, 0, 1.96))
+ lines(rep(-1.96, 2),c(0, 150),lty=2)
+ lines(rep(1.96, 2), c(0, 150),lty=2)
+ title(xlab = "zscore distribution", cex = 1.5, adj = 0.65)
+
+ selcol <- c(6, 8, 14, 16)
+ z5 <- prochlo$BX548174[, selcol]
+ z120 <- prochlo$AE017126[, selcol]
+ z135 <- prochlo$BX548175[, selcol]
+
+ todo <- function(who, xx, col = "black", bottom, loupe){
+ dst <- density(who[, xx])
+ sel <- which(dst$x >= -3)
+ lines(dst$x[sel], dst$y[sel]*loupe + (bottom), col = col)
+ }
+ todo2 <- function(who, bottom, loupe){
+ todo(who, "CC", "blue", bottom, loupe)
+ todo(who, "CT", "red", bottom, loupe)
+ todo(who, "TC", "green", bottom, loupe)
+ todo(who, "TT", "black", bottom, loupe)
+ }
+ todo3 <- function(bottom, who, leg, loupe = 90){
+ lines(c(-5,-3), c(150 - leg, bottom + 20))
+ rect(-3,bottom,3,bottom+40)
+ text(-2.6,bottom+38, paste(leg, "m"))
+ todo2(who, bottom, loupe)
+ }
+
+ todo3(bottom = 110, who = z5, leg = 5)
+ todo3(bottom = 50, who = z120, leg = 120)
+ todo3(bottom = 5, who = z135, leg = 135)
+
+ legend(-4.5,110,c('CpC','CpT','TpC','TpT'),lty=1,pt.cex=cex,
+ col=c('blue','red','green','black'))
+
+ mtext(expression(paste("Dinucleotide composition for three ",
+ italic("Prochlorococcus marinus")," ecotypes")), outer = TRUE, cex = 2, line = 1)
+ }
+}
diff --git a/man/query.Rd b/man/query.Rd
new file mode 100644
index 0000000..219dbc4
--- /dev/null
+++ b/man/query.Rd
@@ -0,0 +1,116 @@
+\name{query}
+\alias{query}
+\title{To get a list of sequence names from an ACNUC data base located on the web}
+\description{
+This is a major command of the package. It executes all sequence retrievals using any selection criteria the data base allows. The sequences are coming from ACNUC data base located on the web and they are transfered by socket. The command produces the list of all sequence names that fit the required criteria. The sequence names belong to the class of sequence \code{SeqAcnucWeb}.
+}
+\usage{
+query(listname, query, socket = autosocket(),
+invisible = TRUE, verbose = FALSE, virtual = FALSE)
+}
+\arguments{
+ \item{listname}{The name of the list as a quoted string of chars}
+ \item{query}{A quoted string of chars containing the request with the syntax given in the details section}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{invisible}{if \code{FALSE}, the result is returned visibly.}
+ \item{verbose}{if \code{TRUE}, verbose mode is on}
+ \item{virtual}{if \code{TRUE}, no attempt is made to retrieve the information about
+ all the elements of the list. In this case, the \code{req} component of the list is set to
+ \code{NA}.}
+}
+\details{
+The query language defines several selection criteria and operations between
+lists of elements matching criteria. It creates mainly lists of sequences, but
+also lists of species (or, more generally, taxa) and of keywords.
+See \url{http://doua.prabi.fr/databases/acnuc/cfonctions.html#QUERYLANGUAGE}
+for the last update of the description of the query language.\cr
+
+Selection criteria (no space before the = sign) are:
+\describe{
+ \item{SP=taxon}{seqs attached to taxon or any other below in tree; @ wildcard possible}
+ \item{TID=id}{seqs attached to given numerical NCBI's taxon id}
+ \item{K=keyword}{seqs attached to keyword or any other below in tree; @ wildcard possible}
+ \item{T=type}{seqs of specified type}
+ \item{J=journalname}{seqs published in journal specified using defined journal code}
+ \item{R=refcode}{seqs from reference specified such as in jcode/volume/page (e.g., JMB/13/5432)}
+ \item{AU=name}{seqs from references having specified author (only last name, no initial)}
+ \item{AC=accessionno}{seqs attached to specified accession number}
+ \item{N=seqname}{seqs of given name (ID or LOCUS); @ wildcard possible}
+ \item{Y=year}{seqs published in specified year; > and < can be used instead of =}
+ \item{O=organelle}{seqs from specified organelle named following defined code (e.g., chloroplast)}
+ \item{M=molecule}{seqs from specified molecule as named in ID or LOCUS annotation records}
+ \item{ST=status}{seqs from specified data class (EMBL) or review level (UniProt)}
+ \item{F=filename}{seqs whose names are in given file, one name per line (unimplemented use \code{\link{clfcd}} instead)}
+ \item{FA=filename}{seqs attached to accession numbers in given file, one number per line (unimplemented use \code{\link{clfcd}} instead)}
+ \item{FK=filename}{produces the list of keywords named in given file, one keyword per line (unimplemented use \code{\link{clfcd}} instead)}
+ \item{FS=filename}{produces the list of species named in given file, one species per line (unimplemented use \code{\link{clfcd}} instead)}
+ \item{listname}{the named list that must have been previously constructed}
+ }
+Operators (always followed and preceded by blanks or parentheses) are:
+\describe{
+ \item{AND}{intersection of the 2 list operands}
+ \item{OR}{union of the 2 list operands}
+ \item{NOT}{complementation of the single list operand}
+ \item{PAR}{compute the list of parent seqs of members of the single list operand}
+ \item{SUB}{add subsequences of members of the single list operand}
+ \item{PS}{project to species: list of species attached to member sequences of the operand list}
+ \item{PK}{project to keywords: list of keywords attached to member sequences of the operand list}
+ \item{UN}{unproject: list of seqs attached to members of the species or keywords list operand}
+ \item{SD}{compute the list of species placed in the tree below the members of the species list operand}
+ \item{KD}{compute the list of keywords placed in the tree below the members of the keywords list operand}
+ }
+The query language is case insensitive.Three operators (AND, OR, NOT)
+can be ambiguous because they can also occur within valid criterion values.
+Such ambiguities can be solved by encapsulating elementary selection
+criteria between escaped double quotes.
+}
+
+\value{
+The result is directly assigned to the object \code{listname} in the user workspace.
+This is an objet of class \code{qaw}, a list with the following 6 components:
+
+ \item{call}{the original call}
+ \item{name}{the ACNUC list name}
+ \item{nelem}{the number of elements (for instance sequences) in the ACNUC list}
+ \item{typelist}{the type of the elements of the list. Could be SQ for a list of
+ sequence names, KW for a list of keywords, SP for a list of species names.}
+ \item{req}{a list of sequence names that fit the required criteria or \code{NA} when
+ called with parameter \code{virtual} is \code{TRUE}}
+ \item{socket}{the socket connection that was used}
+}
+\references{
+To get the release date and content of all the databases located at the pbil, please look at
+the following url: \url{http://doua.prabi.fr/search/releases}\cr
+Gouy, M., Milleret, F., Mugnier, C., Jacobzone, M., Gautier,C. (1984) ACNUC: a nucleic acid sequence data base and analysis system.
+\emph{Nucl. Acids Res.}, \bold{12}:121-127.\cr
+Gouy, M., Gautier, C., Attimonelli, M., Lanave, C., Di Paola, G. (1985)
+ACNUC - a portable retrieval system for nucleic acid sequence databases:
+logical and physical designs and usage.
+\emph{Comput. Appl. Biosci.}, \bold{3}:167-172.\cr
+Gouy, M., Gautier, C., Milleret, F. (1985) System analysis and nucleic acid sequence banks.
+\emph{Biochimie}, \bold{67}:433-436.\cr
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry, D. Charif}
+\note{Most of the documentation was imported from ACNUC help
+files written by Manolo Gouy}
+\seealso{
+ \code{\link{choosebank}},
+ \code{\link{getSequence}},
+ \code{\link{getName}},
+ \code{\link{crelistfromclientdata}}
+}
+\examples{
+ \dontrun{
+ # Need internet connection
+ choosebank("genbank")
+ bb <- query("bb", "sp=Borrelia burgdorferi")
+ # To get the names of the 4 first sequences:
+ sapply(bb$req[1:4], getName)
+ # To get the 4 first sequences:
+ sapply(bb$req[1:4], getSequence, as.string = TRUE)
+ }
+}
+\keyword{utilities}
diff --git a/man/read.abif.Rd b/man/read.abif.Rd
new file mode 100644
index 0000000..fb23328
--- /dev/null
+++ b/man/read.abif.Rd
@@ -0,0 +1,80 @@
+\name{read.abif}
+\alias{read.abif}
+\title{Read ABIF formatted files}
+\description{
+ABIF stands for Applied Biosystem Inc. Format, a binary fromat modeled after TIFF format.
+Corresponding files usually have an \code{*.ab1} or \code{*.fsa} extension.
+}
+\usage{
+read.abif(filename, max.bytes.in.file = file.info(filename)$size,
+ pied.de.pilote = 1.2, verbose = FALSE)
+}
+\arguments{
+ \item{filename}{The name of the file.}
+ \item{max.bytes.in.file}{The size in bytes of the file, defaulting to what is returned by \code{\link{file.info}}}
+ \item{pied.de.pilote}{Safety factor: the argument \code{n} to \code{\link{readBin}} is set as \code{pied.de.pilote*max.bytes.in.file}.}
+ \item{verbose}{logical [FALSE]. If TRUE verbose mode is on.}
+}
+\details{
+All data are imported into memory, there is no attempt to read items on the fly.
+}
+\value{
+A list with three components: \code{Header} which is a list that contains various low-level information,
+among which \code{numelements} is the number of elements in the directory and \code{dataoffset}
+the offset to find the location of the directory. \code{Directory} is a data.frame for the directory
+of the file with the number of row being the number of elements in the directory and the 7
+columns describing various low-level information about the elements. \code{Data} is a list
+with the number of components equal to the number of elements in the directory.
+}
+\references{
+\code{citation("seqinR")}
+
+Anonymous (2006) Applied Biosystem Genetic Analysis Data File Format.
+Available at \url{http://www.appliedbiosystems.com/support/software_community/ABIF_File_Format.pdf}.
+Last visited on 03-NOV-2008.
+
+The figure in the example section is an attempt to reproduce figure 1A from:
+
+Krawczyk, J., Goesmann, A., Nolte, R., Werber, M., Weisshaar, B. (2009)
+Trace2PS and FSA2PS: two software toolkits for converting trace and fsa files to PostScript format.
+\emph{Source Code for Biology and Medicine}, \bold{4}:4.
+
+}
+\author{J.R. Lobry}
+
+\seealso{
+\code{\link{readBin}} which is used here to import the binary file and \code{\link{file.info}} to
+get the size of the file. See \code{\link{JLO}} for the files used in quality check.
+}
+
+\examples{
+#
+# Quality check:
+#
+
+data(JLO)
+JLO.check <- read.abif(system.file("abif/2_FAC321_0000205983_B02_004.fsa",
+ package = "seqinr"))
+stopifnot(identical(JLO, JLO.check))
+
+#
+# Try to reproduce figure 1A from Krawczyk et al. 2009:
+#
+
+Krawczyk <- read.abif(system.file("abif/samplefsa2ps.fsa",
+ package = "seqinr"))$Data
+x <- 1:length(Krawczyk[["DATA.1"]])
+par(mar = c(2,4,2,0)+0.1, cex = 0.5)
+plot(x, Krawczyk[["DATA.1"]], type = "l", col = "blue",
+ ylab = "", xlab = "",
+ ylim = c(-2000, 10000), cex = 0.5,
+ main = "Figure 1A from Krawczyk et al. 2009",
+ xaxs = "i", yaxs = "i",
+ xaxt = "n", yaxt = "n")
+axis(1, at = seq(2000, 24000, by = 2000))
+axis(2, at = seq(-1000, 10000, by = 1000), las = 1)
+lines(x, Krawczyk[["DATA.2"]], col = "green")
+lines(x, Krawczyk[["DATA.3"]], col = "black")
+lines(x, Krawczyk[["DATA.4"]], col = "red")
+
+}
diff --git a/man/read.alignment.Rd b/man/read.alignment.Rd
new file mode 100644
index 0000000..5ebf383
--- /dev/null
+++ b/man/read.alignment.Rd
@@ -0,0 +1,116 @@
+\name{read.alignment}
+\alias{read.alignment}
+\title{Read aligned sequence files in mase, clustal, phylip, fasta or msf format}
+\description{
+ Read a file in \code{mase}, \code{clustal}, \code{phylip}, \code{fasta} or \code{msf} format.
+ These formats are used to store nucleotide or protein multiple alignments.
+}
+\usage{
+read.alignment(file, format, forceToLower = TRUE)
+}
+\arguments{
+ \item{file}{the name of the file which the aligned sequences are to be read from.
+ If it does not contain an absolute or relative path, the file name is relative
+ to the current working directory, \code{\link{getwd}}. }
+ \item{format}{a character string specifying the format of the file : \code{mase},
+ \code{clustal}, \code{phylip}, \code{fasta} or \code{msf} }
+ \item{forceToLower}{a logical defaulting to TRUE stating whether the returned
+ characters in the sequence should be in lower case (introduced in seqinR
+ release 1.1-3).}
+}
+\details{
+ \describe{
+ \item{"mase"}{The mase format is used to store nucleotide or protein
+ multiple alignments. The beginning of the file must contain a header
+ containing at least one line (but the content of this header may be
+ empty). The header lines must begin by \code{;;}. The body of the
+ file has the following structure: First, each entry must begin by
+ one (or more) commentary line. Commentary lines begin by the character
+ \code{;}. Again, this commentary line may be empty. After the
+ commentaries, the name of the sequence is written on a separate
+ line. At last, the sequence itself is written on the following lines.
+}
+ \item{"clustal"}{The CLUSTAL format (*.aln) is the format of the
+ ClustalW multialignment tool output. It can be described as follows.
+ The word CLUSTAL is on the first line of the file. The alignment
+ is displayed in blocks of a fixed length, each line in the block
+ corresponding to one sequence. Each line of each block starts with
+ the sequence name (maximum of 10 characters), followed by at least
+ one space character. The sequence is then displayed in upper or
+ lower cases, '-' denotes gaps. The residue number may be displayed
+ at the end of the first line of each block.
+}
+ \item{"msf"}{ MSF is the multiple sequence alignment format of the
+ GCG sequence analysis package. It begins with the line (all
+ uppercase) !!NA\_MULTIPLE\_ALIGNMENT 1.0 for nucleic acid sequences
+ or !!AA\_MULTIPLE\_ALIGNMENT 1.0 for amino acid sequences. Do
+ not edit or delete the file type if its present.(optional).
+ A description line which contains informative text describing what
+ is in the file. You can add this information to the top of the MSF
+ file using a text editor.(optional) A dividing line which contains
+ the number of bases or residues in the sequence, when the file was
+ created, and importantly, two dots (..) which act as a divider
+ between the descriptive information and the following sequence
+ information.(required) msf files contain some other information:
+ the Name/Weight, a Separating Line which must include two slashes
+ (//) to divide the name/weight information from the sequence
+ alignment.(required) and the multiple sequence alignment.
+}
+ \item{"phylip"}{ PHYLIP is a tree construction program. The format
+ is as follows: the number of sequences and their length (in characters)
+ is on the first line of the file. The alignment is displayed in an
+ interleaved or sequential format. The sequence names are limited
+ to 10 characters and may contain blanks.
+}
+ \item{"fasta"}{ Sequence in fasta format begins with a single-line
+ description (distinguished by a greater-than (>) symbol), followed
+ by sequence data on the next line.
+}
+}
+}
+\value{
+ An object of class \code{alignment} which is a list with the following components:
+ \item{nb}{ the number of aligned sequences }
+ \item{nam}{ a vector of strings containing the names of the aligned sequences }
+ \item{seq}{ a vector of strings containing the aligned sequences}
+ \item{com}{ a vector of strings containing the commentaries for each sequence or \code{NA} if there are no comments }
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry}
+\seealso{
+To read aligned sequences in NEXUS format, see the function
+\code{read.nexus} that was available in the \code{CompPairWise} package
+(not sure it is still maintained as of 09/09/09).
+The NEXUS format was mainly used by the non-GPL commercial PAUP
+software.
+
+Related functions: \code{\link{as.matrix.alignment}}, \code{\link{read.fasta}},
+\code{\link{write.fasta}}, \code{\link{reverse.align}}, \code{\link{dist.alignment}}.
+
+ }
+\examples{
+mase.res <- read.alignment(file = system.file("sequences/test.mase", package = "seqinr"),
+ format = "mase")
+clustal.res <- read.alignment(file = system.file("sequences/test.aln", package = "seqinr"),
+ format="clustal")
+phylip.res <- read.alignment(file = system.file("sequences/test.phylip", package = "seqinr"),
+ format = "phylip")
+msf.res <- read.alignment(file = system.file("sequences/test.msf", package = "seqinr"),
+ format = "msf")
+fasta.res <- read.alignment(file = system.file("sequences/Anouk.fasta", package = "seqinr"),
+ format = "fasta")
+
+#
+# Quality control routine sanity checks:
+#
+
+data(mase); stopifnot(identical(mase, mase.res))
+data(clustal); stopifnot(identical(clustal, clustal.res))
+data(phylip); stopifnot(identical(phylip, phylip.res))
+data(msf); stopifnot(identical(msf, msf.res))
+data(fasta); stopifnot(identical(fasta, fasta.res))
+
+}
+
diff --git a/man/read.fasta.Rd b/man/read.fasta.Rd
new file mode 100644
index 0000000..c21e790
--- /dev/null
+++ b/man/read.fasta.Rd
@@ -0,0 +1,177 @@
+\name{read.fasta}
+\alias{read.fasta}
+\alias{readfasta}
+\alias{FASTA}
+\title{ read FASTA formatted files }
+\description{
+ Read nucleic or amino-acid sequences from a file in FASTA format.
+}
+\usage{
+read.fasta(file = system.file("sequences/ct.fasta.gz", package = "seqinr"),
+ seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE,
+ set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE, strip.desc = FALSE,
+ bfa = FALSE, sizeof.longlong = .Machine$sizeof.longlong,
+ endian = .Platform$endian, apply.mask = TRUE)
+}
+\arguments{
+ \item{file}{ The name of the file which the sequences in fasta format are to be
+ read from. If it does not contain an absolute or relative path, the file name is relative
+ to the current working directory, \code{\link{getwd}}. The default here is to
+ read the \code{ct.fasta.gz} file which is present in the \code{sequences} folder
+ of the seqinR package.}
+ \item{seqtype}{ the nature of the sequence: \code{DNA} or \code{AA}, defaulting
+ to \code{DNA} }
+ \item{as.string}{ if TRUE sequences are returned as a string instead of a
+ vector of single characters}
+ \item{forceDNAtolower}{ whether sequences with \code{seqtype == "DNA"} should be
+ returned as lower case letters }
+ \item{set.attributes}{ whether sequence attributes should be set}
+ \item{legacy.mode}{if TRUE lines starting with a semicolon ';' are ignored}
+ \item{seqonly}{if TRUE, only sequences as returned without attempt to modify
+ them or to get their names and annotations (execution time is divided approximately
+ by a factor 3)}
+ \item{strip.desc}{if TRUE the '>' at the beginning of the description lines is removed
+ in the annotations of the sequences}
+ \item{bfa}{logical. If TRUE the fasta file is in MAQ binary format (see details).
+ Only for DNA sequences.}
+ \item{sizeof.longlong}{the number of bytes in a C \code{long long} type.
+ Only relevant for \code{bfa = TRUE}. See \code{\link{.Machine}}}
+ \item{endian}{character string, \code{"big"} or \code{"little"}, giving the
+ endianness of the processor in use. Only relevant for \code{bfa = TRUE}.
+ See \code{\link{.Platform}}}
+ \item{apply.mask}{logical defaulting to \code{TRUE}. Only relevant for
+ \code{bfa = TRUE}. When this flag is \code{TRUE} the mask in the MAQ
+ binary format is used to replace non acgt characters in the sequence
+ by the n character. For pure acgt sequences (without gaps or ambiguous
+ bases) turning this to \code{FALSE} will save time.}
+}
+\details{
+ FASTA is a widely used format in biology, some FASTA files are distributed
+ with the seqinr package, see the examples section below.
+ Sequence in FASTA format begins with a single-line
+ description (distinguished by a greater-than '>' symbol), followed
+ by sequence data on the next lines. Lines starting by a semicolon ';'
+ are ignored, as in the original FASTA program (Pearson and Lipman 1988).
+ The sequence name is just after the '>' up to the next space ' ' character,
+ trailling infos are ignored for the name but saved in the annotations.
+
+ There is no standard file extension name for a FASTA file. Commonly
+ found values are .fasta, .fas, .fa and .seq for generic FASTA files.
+ More specific file extension names are also used for fasta sequence
+ alignement (.fsa), fasta nucleic acid (.fna), fasta functional
+ nucleotide (.ffn), fasta amino acid (.faa), multiple protein
+ fasta (.mpfa), fasta RNA non-coding (.frn).
+
+ The MAQ fasta binary format was introduced in seqinR 1.1-7 and has not
+ been extensively tested. This format is used in the MAQ (Mapping and
+ Assembly with Qualities) software (\url{http://maq.sourceforge.net/}).
+ In this format the four nucleotides are coded with two bits and the
+ sequence is stored as a vector of C \code{unsigned long long}. There
+ is in addition a mask to locate non-acgt characters.
+}
+\note{
+The old argument \code{File} that was deprecated since seqinR >= 1.1-3 is
+no more valid since seqinR >= 2.0-6. Just use \code{file} instead.
+}
+\value{
+ By default \code{read.fasta} return a list of vector of chars. Each element
+ is a sequence object of the class \code{SeqFastadna} or \code{SeqFastaAA}.
+}
+\references{
+
+ Pearson, W.R. and Lipman, D.J. (1988) Improved tools for biological
+ sequence comparison. \emph{Proceedings of the National Academy
+ of Sciences of the United States of America}, \bold{85}:2444-2448
+
+ According to MAQ's FAQ page \url{http://maq.sourceforge.net/faq.shtml}
+ last consulted 2016-06-07 the MAQ manuscript has not been published.
+
+ \code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry}
+\seealso{
+ \code{\link{write.fasta}} to write sequences in a FASTA file,
+ \code{\link{gb2fasta}} to convert a GenBank file into a FASTA file,
+ \code{\link{read.alignment}} to read aligned sequences,
+ \code{\link{reverse.align}} to get an alignment at the nucleic level from the
+ one at the amino-acid level }
+\examples{
+#
+# Simple sanity check with a small FASTA file:
+#
+ smallFastaFile <- system.file("sequences/smallAA.fasta", package = "seqinr")
+ mySmallProtein <- read.fasta(file = smallFastaFile, as.string = TRUE, seqtype = "AA")[[1]]
+ stopifnot(mySmallProtein == "SEQINRSEQINRSEQINRSEQINR*")
+#
+# Simple sanity check with the gzipped version of the same small FASTA file:
+#
+ smallFastaFile <- system.file("sequences/smallAA.fasta.gz", package = "seqinr")
+ mySmallProtein <- read.fasta(file = smallFastaFile, as.string = TRUE, seqtype = "AA")[[1]]
+ stopifnot(mySmallProtein == "SEQINRSEQINRSEQINRSEQINR*")
+#
+# Example of a DNA file in FASTA format:
+#
+ dnafile <- system.file("sequences/malM.fasta", package = "seqinr")
+#
+# Read with defaults arguments, looks like:
+#
+# $XYLEECOM.MALM
+# [1] "a" "t" "g" "a" "a" "a" "a" "t" "g" "a" "a" "t" "a" "a" "a" "a" "g" "t"
+# ...
+ read.fasta(file = dnafile)
+#
+# The same but do not turn the sequence into a vector of single characters, looks like:
+#
+# $XYLEECOM.MALM
+# [1] "atgaaaatgaataaaagtctcatcgtcctctgtttatcagcagggttactggcaagcgc
+# ...
+ read.fasta(file = dnafile, as.string = TRUE)
+#
+# The same but do not force lower case letters, looks like:
+#
+# $XYLEECOM.MALM
+# [1] "ATGAAAATGAATAAAAGTCTCATCGTCCTCTGTTTATCAGCAGGGTTACTGGCAAGC
+# ...
+ read.fasta(file = dnafile, as.string = TRUE, forceDNAtolower = FALSE)
+#
+# Example of a protein file in FASTA format:
+#
+ aafile <- system.file("sequences/seqAA.fasta", package = "seqinr")
+#
+# Read the protein sequence file, looks like:
+#
+# $A06852
+# [1] "M" "P" "R" "L" "F" "S" "Y" "L" "L" "G" "V" "W" "L" "L" "L" "S" "Q" "L"
+# ...
+ read.fasta(aafile, seqtype = "AA")
+#
+# The same, but as string and without attributes, looks like:
+#
+# $A06852
+# [1] "MPRLFSYLLGVWLLLSQLPREIPGQSTNDFIKACGRELVRLWVEICGSVSWGRTALSLEEP
+# QLETGPPAETMPSSITKDAEILKMMLEFVPNLPQELKATLSERQPSLRELQQSASKDSNLNFEEFK
+# KIILNRQNEAEDKSLLELKNLGLDKHSRKKRLFRMTLSEKCCQVGCIRKDIARLC*"
+#
+ read.fasta(aafile, seqtype = "AA", as.string = TRUE, set.attributes = FALSE)
+#
+# Example with a FASTA file that contains comment lines starting with
+# a semicolon character ';'
+#
+ legacyfile <- system.file("sequences/legacy.fasta", package = "seqinr")
+ legacyseq <- read.fasta(file = legacyfile, as.string = TRUE)
+ stopifnot( nchar(legacyseq) == 921 )
+#
+# Example of a MAQ binary fasta file produced with maq fasta2bfa ct.fasta ct.bfa
+# on a platform where .Platform$endian == "little" and .Machine$sizeof.longlong == 8
+#
+ fastafile <- system.file("sequences/ct.fasta.gz", package = "seqinr")
+ bfafile <- system.file("sequences/ct.bfa", package = "seqinr")
+
+ original <- read.fasta(fastafile, as.string = TRUE, set.att = FALSE)
+ bfavers <- read.fasta(bfafile, as.string = TRUE, set.att = FALSE, bfa = TRUE,
+ endian = "little", sizeof.longlong = 8)
+ if(!identical(original, bfavers)){
+ warning(paste("trouble reading bfa file on a platform with endian =",
+ .Platform$endian, "and sizeof.longlong =", .Machine$sizeof.longlong))
+ }
+}
diff --git a/man/readBins.Rd b/man/readBins.Rd
new file mode 100644
index 0000000..2a06b96
--- /dev/null
+++ b/man/readBins.Rd
@@ -0,0 +1,56 @@
+\name{readBins}
+\alias{readBins}
+\title{Import GenMapper Bins configuration file}
+\description{
+In a Bins configuration file there is a description for a given identification
+kit of the expected allele sizes for all the markers available in the kit.
+}
+\usage{
+readBins(file,
+ colnames = c("allele.name", "size.bp", "minus.bp", "plus.bp"))}
+\arguments{
+ \item{file}{The name of the Bins configuration file.}
+ \item{colnames}{The names to be used for the columns of the data.frames.}
+}
+\value{
+A list whose first element is the file header info and following elements
+are lists, one for each kit encountered in the file. For each kit we have
+a list of data.frames, one per marker.
+}
+\details{
+The expected allele sizes are typically plus or minus 0.5 bp.
+}
+\references{
+\code{citation("seqinR")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{readPanels}}.}
+\examples{
+#
+# Check that we can read the 2 exemple files in the seqinR package:
+#
+path1 <- system.file("abif/AmpFLSTR_Bins_v1.txt", package = "seqinr")
+resbin1 <- readBins(path1)
+path2 <- system.file("abif/Promega_Bins_v1.txt", package = "seqinr")
+resbin2 <- readBins(path2)
+#
+# Show the kits described in resbin1:
+#
+names(resbin1)
+#
+# Show the markers in a given kit:
+#
+names(resbin1[["Identifiler_v1"]])
+#
+# Show alleles expected sizes for a given marker:
+#
+resbin1[["Identifiler_v1"]][["D8S1179"]]
+#
+# Simple quality check since seqinr 2.0-4 with a configuration file
+# containing trailling tabulations:
+#
+path3 <- system.file("abif/Prototype_PowerPlex_EP01_Bins.txt", package = "seqinr")
+resbin3 <- readBins(path3)
+ncols <- sapply(resbin3[[2]], ncol)
+stopifnot(all(ncols == 4))
+}
diff --git a/man/readPanels.Rd b/man/readPanels.Rd
new file mode 100644
index 0000000..f77710f
--- /dev/null
+++ b/man/readPanels.Rd
@@ -0,0 +1,59 @@
+\name{readPanels}
+\alias{readPanels}
+\title{Import GenMapper Panels configuration file}
+\description{
+In a Panel configuration file there is a description for a given identification
+kit of the marker names, their dye label color, expected size range,
+expected positive control genotypes, number of bases in core repeat,
+stutter percentages, and allele names.
+}
+\usage{
+readPanels(file,
+ colnames = c("marker", "dye.col", "min.bp", "max.bp", "exp.pcg", "repeat.bp",
+ "stutter.pc", "uknw", "allele names"))}
+\arguments{
+ \item{file}{The name of the Panel configuration file.}
+ \item{colnames}{The names to be used for the columns of the data.frames.}
+}
+\value{
+A list whose first element is the file header info and following elements
+data.frames, one for each kit encountered in the file.
+}
+\details{
+Number of bases in core repeat is set to 9 for Amelogenin locus.
+}
+\references{
+\code{citation("seqinR")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{readBins}}, \code{\link{plotPanels}}.}
+\examples{
+#
+# Check that we can read the 2 exemple files in the seqinR package:
+#
+path1 <- system.file("abif/AmpFLSTR_Panels_v1.txt", package = "seqinr")
+res1 <- readPanels(path1)
+path2 <- system.file("abif/Promega_Panels_v1.txt", package = "seqinr")
+res2 <- readPanels(path2)
+#
+# Show the kits described in res1:
+#
+names(res1)
+#
+# Show some data for a given kit:
+#
+res1[["Identifiler_v1"]][, 1:7]
+#
+# Plot a simple summary of two kits:
+#
+par(mfrow = c(2,1))
+plotPanels("Identifiler_v1", res1)
+plotPanels("PowerPlex_16_v1", res2)
+
+#
+# Simple quality check since seqinR 2.0-4 with a file which containing
+# a non constant number of tabulations as separator:
+#
+path3 <- system.file("abif/Prototype_PowerPlex_EP01_Pa.txt", package = "seqinr")
+res3 <- readPanels(path3)
+}
diff --git a/man/readfirstrec.Rd b/man/readfirstrec.Rd
new file mode 100644
index 0000000..d05a562
--- /dev/null
+++ b/man/readfirstrec.Rd
@@ -0,0 +1,56 @@
+\name{readfirstrec}
+\alias{readfirstrec}
+\title{Low level function to get the record count of the specified ACNUC
+index file}
+\description{
+ Called without arguments, the list of available values for argument type is
+ returned.
+}
+\usage{
+readfirstrec(socket = autosocket(), type)
+}
+\arguments{
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{type}{the ACNUC index file}
+}
+\details{
+Available index files are:
+
+ \describe{
+ \item{AUT}{AUTHOR one record for each author name (last name only, no initials)}
+ \item{BIB}{BIBLIO one record for each reference}
+ \item{ACC}{ACCESS one record for each accession number}
+ \item{SMJ}{SMJYT one record for each status, molecule, journal, year, type,
+ organelle, division, and db structure information}
+ \item{SUB}{SUBSEQ one record for each parent or sub-sequence}
+ \item{LOC}{LOCUS one record for each parent sequence}
+ \item{KEY}{KEYWORDS one record for each keyword}
+ \item{SPEC}{SPECIES one record for each taxon}
+ \item{SHRT}{SHORTL mostly, one record for each element of a short list}
+ \item{LNG}{LONGL one record for each group of SUBINLNG elements of a long list}
+ \item{EXT}{EXTRACT (for nucleotide databases only) one record for each exon of each subsequence}
+ \item{TXT}{TEXT one lrtxt-character record for each label of a species, keyword, or SMJYT}
+ }
+}
+\value{
+ The record count of ACNUC index file, or NA if missing (typically when
+ asking for type = EXT on a protein database).
+}
+\references{
+ See ACNUC physical structure at
+ \url{http://doua.prabi.fr/databases/acnuc/structure.html}.\cr
+
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{choosebank}} }
+\examples{
+\dontrun{
+# Need internet connection
+ choosebank("genbank")
+ allowedtype <- readfirstrec()
+ sapply(allowedtype, function(x) readfirstrec(type = x))
+ }
+}
+\keyword{utilities}
diff --git a/man/readsmj.Rd b/man/readsmj.Rd
new file mode 100644
index 0000000..5d8c872
--- /dev/null
+++ b/man/readsmj.Rd
@@ -0,0 +1,40 @@
+\name{readsmj}
+\alias{readsmj}
+\title{Low level function to read ACNUC SMJYT index files}
+\description{
+ Extract informations from the SMJYT index file for status, molecule, journal,
+ year, type, organelle, division, and db structure information.
+}
+\usage{
+readsmj(socket = autosocket(), num = 2, nl = 10, recnum.add = FALSE, nature.add = TRUE,
+plong.add = FALSE, libel.add = FALSE, sname.add = FALSE, all.add = FALSE)
+}
+\arguments{
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{num}{rank number of first record.}
+ \item{nl}{number of records to read.}
+ \item{recnum.add}{to extract record numbers.}
+ \item{nature.add}{to extract as a factor with human understandable levels
+ the nature of the name. Unordered levels are: status, molecule, journal,
+ year, type, organelle, division and dbstrucinfo.}
+ \item{plong.add}{to extract the plong.}
+ \item{libel.add}{to extract the label of the name.}
+ \item{sname.add}{to extract the short version of the name, that is without
+ the first two characters.}
+ \item{all.add}{to extract all (all flags set to TRUE).}
+}
+
+\value{
+ A data.frame with requested columns.
+}
+\references{
+ See ACNUC physical structure at:
+ \url{http://doua.prabi.fr/databases/acnuc/structure.html}.\cr
+
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{choosebank}} to start a session and
+\code{\link{readfirstrec}} to get the total number of records.}
+\keyword{utilities}
diff --git a/man/rearranged.oriloc.Rd b/man/rearranged.oriloc.Rd
new file mode 100644
index 0000000..35d8537
--- /dev/null
+++ b/man/rearranged.oriloc.Rd
@@ -0,0 +1,97 @@
+\name{rearranged.oriloc}
+\alias{rearranged.oriloc}
+\title{Detection of replication-associated effects on base composition asymmetry in
+ prokaryotic chromosomes.}
+\description{
+ Detection of replication-associated effects on base composition asymmetry in
+ prokaryotic chromosomes.
+ }
+
+\usage{
+rearranged.oriloc(seq.fasta = system.file("sequences/ct.fasta.gz", package = "seqinr"),
+ g2.coord = system.file("sequences/ct.predict", package = "seqinr"))
+}
+
+\arguments{
+\item{seq.fasta}{The path of the file containing a FASTA-format
+ sequence. Default value:
+ the FASTA sequence of the Chlamydia trachomatis chromosome. }
+\item{g2.coord}{The path of the file containing the coordinates of the
+ protein coding genes found on this chromosome. This file can be
+ obtained using the function \code{gbk2g2}. The format of the file is
+ similar to the output of the Glimmer2 program. The first column
+ contains the index or the name of the gene, the second one contains
+ the start position and the third column contains the end position. For
+ reverse transcribed genes, the start position is greater than the end position.}
+}
+\details{
+The purpose of this method is to decouple replication-related
+and coding sequence-related effects on base composition asymmetry. In
+order to do so, the analyzed chromosome is artificially rearranged to
+obtain a perfect gene orientation bias - all forward transcribed genes
+on the first half of the chromosome, and all reverse transcribed genes
+on the other half.
+This rearrangement conserves the relative order of genes within each of
+the two groups - both forward-encoded and reverse-encoded genes are
+placed on the rearranged chromosome in increasing order of their
+coordinates on the real chromosome.
+If the replication mechanism has a significant effect on base
+composition asymmetry, this should be seen as a change of slope in the
+nucleotide skews computed on the rearranged chromosome; the change of
+slope should take place at the origin or the terminus of replication.
+Use \code{extract.breakpoints} to detect the position of the changes in
+slope on the rearranged nucleotide skews.
+}
+
+\value{
+ A data.frame with six columns: \code{meancoord.rearr} contains the
+ gene index on the rearranged chromosome; \code{gcskew.rearr} contains
+ the normalized GC-skew ((G-C)/(G+C)) computed on the third codon positions of
+ protein coding genes, still on the rearranged chromosome; \code{atskew.rearr} contains
+ the normalized AT-skew ((A-T)/(A+T)) computed on the third codon positions of
+ protein coding genes; \code{strand.rearr} contains the transcription
+ strand of the gene (either "forward" or "reverse"); \code{order}
+ contains the permutation that was used to obtain a perfect gene
+ orientation bias; \code{meancoord.real} contains the mid-coordinate of
+ the genes on the real chromosome (before the rearrangement).
+}
+
+\references{
+ Necşulea, A. and Lobry, J.R. (2007) A New Method for Assessing the
+ Effect of Replication on DNA Base Composition Asymmetry.
+ \emph{Molecular Biology and Evolution}, \bold{24}:2169-2179.
+}
+
+\author{A. Necşulea}
+\seealso{ \code{\link{oriloc}}, \code{\link{draw.rearranged.oriloc}},
+ \code{\link{extract.breakpoints}} }
+
+\examples{
+
+### Example for Chlamydia trachomatis ####
+
+### Rearrange the chromosome and compute the nucleotide skews ###
+
+\dontrun{r.ori <- rearranged.oriloc(seq.fasta =
+ system.file("sequences/ct.fasta.gz", package = "seqinr"),
+ g2.coord = system.file("sequences/ct.predict", package = "seqinr"))}
+
+### Extract the breakpoints for the rearranged nucleotide skews ###
+
+
+
+\dontrun{breaks <- extract.breakpoints(r.ori, type = c("gcfw", "gcrev"),
+ nbreaks =c(2, 2), gridsize = 50, it.max = 100)}
+
+
+
+### Draw the rearranged nucleotide skews and place the position of the breakpoints ###
+### on the graphics ###
+
+\dontrun{draw.rearranged.oriloc(r.ori, breaks.gcfw = breaks$gcfw$breaks,
+ breaks.gcrev = breaks$gcrev$breaks)}
+
+
+}
+
+\keyword{utilities}
diff --git a/man/recstat.Rd b/man/recstat.Rd
new file mode 100755
index 0000000..aa084ba
--- /dev/null
+++ b/man/recstat.Rd
@@ -0,0 +1,62 @@
+\name{recstat}
+\alias{recstat}
+\title{Prediction of Coding DNA Sequences.}
+\description{This function aims at predicting the position of Coding DNA Sequences (CDS) through
+the use of a Correspondence Analysis (CA) computed on codon composition, this for the three
+reading frames of a DNA strand.
+}
+\usage{recstat(seq, sizewin = 90, shift = 30, seqname = "no name")}
+\arguments{
+ \item{seq}{a nucleic acid sequence as a vector of characters}
+ \item{sizewin}{an integer, multiple of 3, giving the length of the sliding window}
+ \item{shift}{an integer, multiple of 3, giving the length of the steps between two windows}
+ \item{seqname}{the name of the sequence}
+}
+\details{The method is built on the hypothesis that the codon composition of a CDS is biased
+while it is not the case outside these regions. In order to detect such bias, a CA on codon
+frequencies is computed on the six possible reading frames of a DNA sequence (three from the
+direct strand and three from the reverse strand). When there is a CDS in one of the reading
+frame, it is expected that the CA factor scores observed in this frame (fot both rows and
+columns) will be significantly different from those in the two others.}
+\value{This function returns a list containing the following components:\cr
+ \item{seq}{a single DNA sequence as a vector of characters}
+ \item{sizewin}{length of the sliding window}
+ \item{shift}{length of the steps between windows}
+ \item{seqsize}{length of the sequence}
+ \item{seqname}{name of the sequence}
+ \item{vdep}{a vector containing the positions of windows starts}
+ \item{vind}{a vector containing the reading frame of each window}
+ \item{vstopd}{a vector of stop codons positions in direct strand}
+ \item{vstopr}{a vector of stop codons positions in reverse strand}
+ \item{vinitd}{a vector of start codons positions in direct strand}
+ \item{vinitr}{a vector of start codons positions in reverse strand}
+ \item{resd}{a matrix containing codons frequencies for all the windows in the three frames
+ of the direct strand}
+ \item{resr}{a matrix containing codons frequencies for all the windows in the three frames
+ of the reverse strand}
+ \item{resd.coa}{list of class \code{coa} and \code{dudi} containing the result of the
+ CA computed on the codons frequencies in the direct strand}
+ \item{resr.coa}{list of class \code{coa} and \code{dudi} containing the result of the
+ CA computed on the codons frequencies in the reverse strand}
+}
+\note{This method works only with DNA sequences long enough to obtain a sufficient number
+of windows. As the optimal windows length has been estimated to be 90 bp by Fichant and
+Gautier (1987), the minimal sequence length is around 500 bp. The method can be used on
+prokaryotic and eukaryotic sequences. Also, only the four first factors of the CA are kept.
+Indeed, most of the time, only the first factor is relevant in order to detect CDS.
+}
+\author{O. Clerc, G. Perrière}
+\references{The original paper describing recstat is:\cr
+
+Fichant, G., Gautier, C. (1987) Statistical method for predicting protein coding
+regions in nucleic acid sequences. \emph{Comput. Appl. Biosci.}, \bold{3}, 287--295.\cr
+\url{http://bioinformatics.oxfordjournals.org/content/3/4/287.abstract}\cr
+}
+\seealso{\code{\link{draw.recstat}}, \code{\link{test.li.recstat}}, \code{\link{test.co.recstat}}}
+\examples{
+ff <- system.file("sequences/ECOUNC.fsa", package = "seqinr")
+seq <- read.fasta(ff)
+rec <- recstat(seq[[1]], seqname = getName(seq))
+}
+\keyword{correspondence analysis}
+\keyword{sequence}
diff --git a/man/residuecount.Rd b/man/residuecount.Rd
new file mode 100644
index 0000000..fb421b7
--- /dev/null
+++ b/man/residuecount.Rd
@@ -0,0 +1,36 @@
+\name{residuecount}
+\alias{residuecount}
+\title{Total number of residues in an ACNUC list}
+\description{
+Computes the total number of residues (nucleotides or aminoacids) in
+all sequences of the list of specified rank.
+}
+\usage{
+residuecount(lrank, socket = autosocket())
+}
+\arguments{
+ \item{lrank}{the list rank on the ACNUC server}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+}
+\value{
+A single numeric value corresponding to the total number of residues or
+NA in case of problem.
+}
+
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}}, \code{\link{glr}} }
+\examples{
+\dontrun{
+ ### Need internet connection
+ choosebank("emblTP")
+ mylist <- query("mylist", "t=CDS", virtual = TRUE)
+ stopifnot(residuecount(glr("mylist")) == 1611439240)
+ stopifnot(is.na(residuecount(glr("unknowlist")))) # A warning is issued
+ }
+}
+\keyword{ utilities }
diff --git a/man/revaligntest.Rd b/man/revaligntest.Rd
new file mode 100644
index 0000000..c750b5b
--- /dev/null
+++ b/man/revaligntest.Rd
@@ -0,0 +1,22 @@
+\name{revaligntest}
+\alias{revaligntest}
+\docType{data}
+\title{ Three aligned nucleic acid sequences }
+\encoding{latin1}
+\description{
+This dataset is used as a sanity check in \code{\link{reverse.align}}.
+}
+\usage{
+data(revaligntest)
+}
+\format{
+ An object of class alignment with 3 sequences.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\seealso{ \code{\link{reverse.align}} }
+\examples{
+data(revaligntest)
+}
+\keyword{ datasets }
diff --git a/man/reverse.align.Rd b/man/reverse.align.Rd
new file mode 100644
index 0000000..0164b87
--- /dev/null
+++ b/man/reverse.align.Rd
@@ -0,0 +1,111 @@
+\name{reverse.align}
+\alias{reverse.align}
+\title{ Reverse alignment - from protein sequence alignment to nucleic sequence alignment }
+\description{
+ This function produces an alignment of nucleic protein-coding sequences, using as a
+ guide the alignment of the corresponding protein sequences.
+}
+\usage{
+reverse.align(nucl.file, protaln.file, input.format = 'fasta', out.file,
+ output.format = 'fasta', align.prot = FALSE, numcode = 1,
+ clustal.path = NULL, forceDNAtolower = TRUE, forceAAtolower = FALSE)
+}
+\arguments{
+ \item{nucl.file}{ A character string specifying the name of the FASTA format file containing the nucleotide sequences. }
+ \item{protaln.file}{A character string specifying the name of the file containing the aligned
+ protein sequences. This argument must be provided if \code{align.prot} is
+ set to \code{FALSE}. }
+ \item{input.format}{ A character string specifying the format of the
+ protein alignment file : 'mase', 'clustal', 'phylip', 'fasta' or 'msf'. }
+ \item{out.file}{A character string specifying the name of the output file. }
+ \item{output.format}{ A character string specifying the format of the output file. Currently the only
+ implemented format is 'fasta'. }
+ \item{align.prot}{Boolean. If TRUE, the nucleic sequences are
+ translated and then the protein sequences are aligned with the ClustalW program. The path
+ of the ClustalW binary must also be given (\code{clustal.path}) }
+ \item{numcode}{The NCBI genetic code number for the translation of the
+ nucleic sequences. By default the standard genetic code is used.}
+ \item{clustal.path}{ The path of the ClustalW binary. This argument
+ only needs to be setif \code{align.prot} is TRUE. }
+ \item{forceDNAtolower}{logical passed to \code{\link{read.fasta}} for reading
+ the nucleic acid file.}
+ \item{forceAAtolower}{logical passed to \code{\link{read.alignment}} for reading
+ the aligned protein sequence file.}
+}
+\details{
+ This function an alignment of nucleic protein-coding sequences using as a
+ guide the alignment of the corresponding protein sequences. The file containing
+ the nucleic sequences is given in the compulsory argument 'nucl.file';
+ this file must be written in the FASTA format.
+
+ The alignment of the protein sequences can either be provided
+ directly, trough the 'protaln.file' parameter, or reconstructed with
+ ClustalW, if the parameter 'align.prot' is set to TRUE. In the latter
+ case, the pathway of the ClustalW binary must be given in the
+ 'clustal.path' argument.
+
+ The protein and nucleic sequences must have the same name in the files
+ \code{nucl.file} and \code{protaln.file}.
+
+ The reverse-aligned nucleotide sequences are written to the file
+ specified in the compulsory 'out.file' argument. For now, the only
+ output format implemented is FASTA.
+
+ Warning: the 'align.prot=TRUE' option has only been tested on LINUX
+ operating systems. ClustalW must be installed on your system in order
+ for this to work.
+}
+\value{
+ NULL
+}
+\references{
+ \code{citation('seqinr')}
+}
+\author{A. Necşulea}
+\seealso{ \code{\link{read.alignment}}, \code{\link{read.fasta}}, \code{\link{write.fasta}}}
+
+\examples{
+
+#
+# Read example 'bordetella.fasta': a triplet of orthologous genes from
+# three bacterial species (Bordetella pertussis, B. parapertussis and
+# B. bronchiseptica):
+#
+
+nucl.file <- system.file('sequences/bordetella.fasta', package = 'seqinr')
+triplet <- read.fasta(nucl.file)
+
+#
+# For this example, 'bordetella.pep.aln' contains the aligned protein
+# sequences, in the Clustal format:
+#
+
+protaln.file <- system.file('sequences/bordetella.pep.aln', package = 'seqinr')
+triplet.pep<- read.alignment(protaln.file, format = 'clustal')
+
+#
+# Call reverse.align for this example:
+#
+
+reverse.align(nucl.file = nucl.file, protaln.file = protaln.file,
+ input.format = 'clustal', out.file = 'test.revalign')
+
+#
+# Simple sanity check against expected result:
+#
+
+res.new <- read.alignment("test.revalign", format = "fasta")
+data(revaligntest)
+stopifnot(identical(res.new, revaligntest))
+
+#
+# Alternatively, we can use ClustalW to align the translated nucleic
+# sequences. Here the ClustalW program is accessible simply by the
+# 'clustalw' name.
+#
+
+\dontrun{
+reverse.align(nucl.file = nucl.file, out.file = 'test.revalign.clustal',
+ align.prot = TRUE, clustal.path = 'clustalw')}
+}
+\keyword{ manip }
diff --git a/man/rot13.Rd b/man/rot13.Rd
new file mode 100644
index 0000000..d6a950d
--- /dev/null
+++ b/man/rot13.Rd
@@ -0,0 +1,34 @@
+\name{rot13}
+\alias{rot13}
+\title{Ergheaf gur EBG-13 pvcurevat bs n fgevat}
+\description{
+ rot13 applied to the above title returns the string
+ "Returns the ROT-13 ciphering of a string".
+}
+\usage{
+rot13(string)
+}
+\arguments{
+ \item{string}{a string of characters.}
+}
+\value{
+ a string of characters.
+}
+\author{J.R. Lobry}
+\references{
+ \code{citation("seqinr")}
+}
+\seealso{\code{\link{chartr}}
+}
+\examples{
+##
+## Simple ciphering of a string:
+##
+message <- "Hello, world!"
+rot13(message) # "Uryyb, jbeyq!"
+##
+## Routine sanity check:
+##
+stopifnot(identical(rot13(rot13(message)), message))
+}
+\keyword{ manip }
diff --git a/man/s2c.Rd b/man/s2c.Rd
new file mode 100755
index 0000000..c4b3ae6
--- /dev/null
+++ b/man/s2c.Rd
@@ -0,0 +1,27 @@
+\name{s2c}
+\alias{s2c}
+\title{ conversion of a string into a vector of chars }
+\description{
+ This is a simple utility function to convert a single string such
+ as \code{"BigBang"} into a vector of chars such as
+ \code{c("B", "i", "g", "B", "a", "n", "g")}.
+}
+\usage{
+s2c(string)
+}
+\arguments{
+ \item{string}{ a string of chars }
+}
+\value{
+ a vector of chars. If supplied argument is not a single string, a warning is
+ issued and NA returned.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{c2s}} }
+\examples{
+stopifnot(all(s2c("BigBang") == c("B", "i", "g", "B", "a", "n", "g")))
+}
+\keyword{utilities}
diff --git a/man/s2n.Rd b/man/s2n.Rd
new file mode 100644
index 0000000..5c832bb
--- /dev/null
+++ b/man/s2n.Rd
@@ -0,0 +1,65 @@
+\name{s2n}
+\alias{s2n}
+\title{ simple numerical encoding of a DNA sequence.
+ }
+\description{
+By default, if no \code{levels} arguments is provided, this function will
+just code your DNA sequence in integer values following the lexical
+order \code{(a > c > g > t)}, that is 0 for "a", 1 for "c", 2 for "g", 3 for
+"t" and NA for ambiguous bases.
+}
+\usage{
+s2n(seq, levels = s2c("acgt"), base4 = TRUE, forceToLower = TRUE)
+}
+\arguments{
+ \item{seq}{ the sequence as a vector of single chars }
+ \item{levels}{ allowed char values, by default a, c, g and t }
+ \item{base4}{if TRUE the numerical encoding will start at O, if
+FALSE at 1}
+ \item{forceToLower}{if TRUE the sequence is forced to lower case caracters}
+}
+\value{
+ a vector of integers
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry }
+\note{
+The idea of starting numbering at 0 by default is that it enforces
+a kind of isomorphism between the paste operator on DNA chars and
+the + operator on integer coding for DNA chars. By this way, you can
+work either in the char set, either in the integer set, depending
+on what is more convenient for your purpose, and then switch from one
+set to the other one as you like.
+}
+\seealso{ \code{\link{n2s}}, \code{\link{factor}}, \code{\link{unclass}} }
+\examples{
+##
+## Example of default behaviour:
+##
+urndna <- s2c("acgt")
+seq <- sample( urndna, 100, replace = TRUE ) ; seq
+s2n(seq)
+##
+## How to deal with RNA:
+##
+urnrna <- s2c("acgt")
+seq <- sample( urnrna, 100, replace = TRUE ) ; seq
+s2n(seq)
+##
+## what happens with unknown characters:
+##
+urnmess <- c(urndna,"n")
+seq <- sample( urnmess, 100, replace = TRUE ) ; seq
+s2n(seq)
+##
+## How to change the encoding for unknown characters:
+##
+tmp <- s2n(seq) ; tmp[is.na(tmp)] <- -1; tmp
+##
+## Simple sanity check:
+##
+stopifnot(all(s2n(s2c("acgt")) == 0:3))
+}
+\keyword{ utilities }
diff --git a/man/savelist.Rd b/man/savelist.Rd
new file mode 100644
index 0000000..bb38473
--- /dev/null
+++ b/man/savelist.Rd
@@ -0,0 +1,45 @@
+\name{savelist}
+\alias{savelist}
+\title{Save sequence names or accession numbers into a file}
+\description{
+This function retrieves all sequence names or all accession number from
+an ACNUC list and saves them into a file.
+}
+\usage{
+savelist(lrank, type = c("N", "A"),
+ filename = paste(gln(lrank), ifelse(type == "N", "mne", "acc"),
+ sep = "."),socket = autosocket(), warnme = TRUE)
+}
+\arguments{
+ \item{lrank}{the rank of the ACNUC list to consider.}
+ \item{type}{use "N" for sequence names (mnemonics) and "A" for accession numbers.
+ Default is "N".}
+ \item{filename}{a string of character giving the name of the file to save results.}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+ \item{warnme}{if TRUE a message is issued on the console when complete.}
+}
+
+\value{
+none.
+}
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}}, \code{\link{glr}} to
+get a list rank from its name, \code{\link{clfcd}} for the inverse operation
+of \code{savelist}}
+\examples{
+\dontrun{
+ ### Need internet connection
+ choosebank("emblTP")
+ mylist <- query("mylist", "sp=felis catus et t=cds", virtual=TRUE)
+ savelist(glr("mylist"))
+ # 603 sequence mnemonics written into file: MYLIST.mne
+ savelist(glr("mylist"), type = "A")
+ # 603 sequence accession numbers written into file: MYLIST.acc
+ }
+}
+\keyword{ utilities }
diff --git a/man/seqinr-package.Rd b/man/seqinr-package.Rd
new file mode 100644
index 0000000..b582b8c
--- /dev/null
+++ b/man/seqinr-package.Rd
@@ -0,0 +1,20 @@
+\name{seqinr-package}
+\alias{seqinr-package}
+\alias{seqinr}
+\docType{package}
+\title{
+Biological Sequences Retrieval and Analysis
+}
+\description{
+Exploratory data analysis and data visualization for
+biological sequence (DNA and protein) data. Include also
+utilities for sequence data management under the ACNUC system.
+}
+\author{Delphine Charif [aut], Olivier Clerc [ctb], Carolin Frank [ctb],
+ Jean R. Lobry [aut], Anamaria Necşulea [ctb], Leonor Palmeira [ctb],
+ Simon Penel [cre], Guy Perrière [ctb]}
+\references{
+citation('seqinr')
+}
+\keyword{ package }
+
diff --git a/man/setlistname.Rd b/man/setlistname.Rd
new file mode 100644
index 0000000..f5f5214
--- /dev/null
+++ b/man/setlistname.Rd
@@ -0,0 +1,45 @@
+\name{setlistname}
+\alias{setlistname}
+\title{Sets the name of an ACNUC list identified by its rank}
+\description{
+This is a low level function to set the name of a list from an ACNUC server.
+It should not be used directly by end users.
+}
+\usage{
+setlistname(lrank, name = "list1", socket = autosocket())
+}
+\arguments{
+ \item{lrank}{the list rank on the ACNUC server}
+ \item{name}{the name to use for this list}
+ \item{socket}{an object of class \code{sockconn} connecting to a remote ACNUC
+ database (default is a socket to the last opened database).}
+}
+\value{
+A single numeric value corresponding to:
+
+ \item{NA}{Empty answer from server.}
+ \item{0}{OK.}
+ \item{3}{if another list with that name already existed and was deleted.}
+ \item{4}{no list of rank \code{lrank} exists.}
+}
+
+\references{ \url{http://doua.prabi.fr/databases/acnuc.html}
+
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{choosebank}}, \code{\link{query}}, \code{\link{glr}} }
+\examples{
+\dontrun{
+ ### Need internet connection
+ choosebank("emblTP")
+ mylist <- query("mylist", "sp=felis catus et t=CDS", virtual = TRUE)
+ # Change list name on server:
+ setlistname(lrank = glr("mylist"), name = "feliscatus") # 0, OK.
+ glr("mylist") # 0, list doesn't exist no more.
+ glr("feliscatus") # 2, this list exists.
+ # Note the danger here: the object mylist is still present in the user workspace
+ # while the corresponding list was deleted from server.
+ }
+}
+\keyword{ utilities }
diff --git a/man/splitseq.Rd b/man/splitseq.Rd
new file mode 100644
index 0000000..9623d2f
--- /dev/null
+++ b/man/splitseq.Rd
@@ -0,0 +1,41 @@
+\name{splitseq}
+\alias{splitseq}
+\title{ split a sequence into sub-sequences }
+\description{
+ Split a sequence into sub-sequences of 3 (the default size) with no overlap between the sub-sequences.
+}
+\usage{
+splitseq(seq, frame = 0, word = 3)
+}
+\arguments{
+ \item{seq}{ a vector of chars }
+ \item{frame}{ an integer (0, 1, 2) giving the starting position to split the sequence }
+ \item{word}{ an integer giving the size of the sub-sequences }
+}
+\value{
+ This function returns a vector which contains the sub-sequences.
+}
+\references{
+\code{citation("seqinr")}
+\cr
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{split}} }
+\examples{
+cds <- s2c("aacgttgcaggtcgctcgctacgtagctactgttt")
+#
+# To obtain the codon sequence in frame 0:
+#
+stopifnot(identical(splitseq(cds),
+ c("aac", "gtt", "gca", "ggt", "cgc", "tcg", "cta", "cgt", "agc", "tac", "tgt")))
+#
+# Show the effect of frame and word with a ten char sequence:
+#
+(tenchar <- s2c("1234567890"))
+splitseq(tenchar, frame = 0)
+splitseq(tenchar, frame = 1)
+splitseq(tenchar, frame = 2)
+splitseq(tenchar, frame = 0, word = 2)
+splitseq(tenchar, frame = 0, word = 1)
+}
+\keyword{ manip }
diff --git a/man/stresc.Rd b/man/stresc.Rd
new file mode 100644
index 0000000..e418a34
--- /dev/null
+++ b/man/stresc.Rd
@@ -0,0 +1,27 @@
+\name{stresc}
+\alias{stresc}
+\title{Utility function to escape LaTeX special characters present in a string}
+\description{
+ This function returns a vector of strings in which LaTeX special characters are
+ escaped, this was useful in conjunction with xtable.
+}
+\usage{
+stresc(strings)
+}
+\arguments{
+ \item{strings}{A vector of strings to deal with.}
+}
+\value{
+ Returns a vector of strings with escaped characters within each string.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{s2c}} }
+\examples{
+ stresc("MISC_RNA")
+ stresc(c("BB_0001","BB_0002"))
+}
+
+
diff --git a/man/stutterabif.Rd b/man/stutterabif.Rd
new file mode 100644
index 0000000..611298c
--- /dev/null
+++ b/man/stutterabif.Rd
@@ -0,0 +1,83 @@
+\name{stutterabif}
+\Rdversion{1.1}
+\alias{stutterabif}
+\title{Stutter ratio estimation}
+\description{
+This function tries to estimate the stutter ratio, either in terms of peak heigth ratios or peak
+surface ratio.
+}
+\usage{
+stutterabif(abifdata, chanel, poswild, datapointbefore = 70,
+ datapointafter = 20, datapointsigma = 3.5,
+ chanel.names = c(1:4, 105), DATA = paste("DATA", chanel.names[chanel], sep = "."),
+ maxrfu = 1000, method = "monoH.FC", pms = 6, fig = FALSE)
+}
+\arguments{
+ \item{abifdata}{the result returned by \code{\link{read.abif}}}
+ \item{chanel}{the dye number}
+ \item{poswild}{the position in datapoint units of the allele at
+ the origin of the stutter product, typically obtained after a call to \code{\link{peakabif}}}
+ \item{datapointbefore}{how many datapoints before \code{poswild} to be include in analysis}
+ \item{datapointafter}{how many datapoints after \code{poswild} to be include in analysis}
+ \item{datapointsigma}{initial guess for the standard deviation of a peak}
+ \item{chanel.names}{numbers extensions used for the DATA}
+ \item{DATA}{names of the DATA components}
+ \item{maxrfu}{argument passed to \code{\link{baselineabif}}}
+ \item{method}{method to be used by \code{\link{splinefun}}}
+ \item{pms}{how many standard deviations (after gaussian fit) before and after the mean
+ peak values should be considered for spline function interpolation}
+ \item{fig}{should a summary plot be produced?}
+}
+
+\details{FIXME, See R code for now}
+
+\value{
+A list with the following components:
+\item{rh}{Stutter ratio computed as the height of the stutter divided by
+ the height of its corresponding allele}
+\item{rs}{Stutter ratio computed as the surface of the stutter divided by
+ the surface of its corresponding allele}
+\item{h1}{The height of the stutter with baseline at 0}
+\item{h2}{The height of the allele with baseline at 0}
+\item{s1}{The surface of the stutter}
+\item{s2}{The surface of the allele}
+\item{p}{A list of additional parameter that could be usesfull, see example}
+}
+
+\author{J.R. Lobry}
+
+\seealso{\code{\link{JLO}} for a dataset example,
+\code{\link{peakabif}} to get an estimate of peak location.}
+\examples{
+ #
+ # Load pre-defined dataset, same as what would be obtained with read.abif:
+ #
+
+data(JLO)
+
+ #
+ # Get peak locations in the blue channel:
+ #
+
+maxis <- peakabif(JLO, 1, npeak = 6, tmin = 3, fig = FALSE)$maxis
+
+ #
+ # Compute stutter ratio for first peak and ask for a figure:
+ #
+
+tmp <- stutterabif(JLO, 1, maxis[1], fig = TRUE)
+
+ #
+ # Show in addition the normal approximation used at the stutter peak:
+ #
+
+xx <- seq(tmp$p$mu1 - 6*tmp$p$sd1, tmp$p$mu1 + 6*tmp$p$sd1, le = 100)
+lines(xx, tmp$p$p1*dnorm(xx, tmp$p$mu1, tmp$p$sd1), col = "darkgreen")
+
+ #
+ # Show in addition the normal approximation used at allele peak:
+ #
+
+xx <- seq(tmp$p$mu2 - 6*tmp$p$sd2, tmp$p$mu2 + 6*tmp$p$sd2, le = 100)
+lines(xx, tmp$p$p2*dnorm(xx, tmp$p$mu2, tmp$p$sd2), col = "darkgreen")
+}
diff --git a/man/swap.Rd b/man/swap.Rd
new file mode 100644
index 0000000..525bfe2
--- /dev/null
+++ b/man/swap.Rd
@@ -0,0 +1,54 @@
+\name{swap}
+\alias{swap}
+\title{Exchange two R objects}
+\description{
+Exchange object \code{x} with object \code{y}.
+}
+\usage{
+swap(x, y)
+}
+\arguments{
+ \item{x}{an R object}
+ \item{y}{an R object}
+}
+\value{
+none.
+}
+\references{
+\code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{\code{\link{move}} }
+
+\examples{
+#
+# Example in a new empty environment:
+#
+local({
+ x <- 0:9
+ y <- 10:19
+ print(x)
+ print(y)
+ swap(x[1], y[2])
+ print(x)
+ print(y)
+})
+#
+# Sanity check with a bubble sort:
+#
+bubble.sort <- function(tab, n = length(tab)){
+ i <- 1
+ while(i < n){
+ if(tab[i + 1] < tab[i]){
+ swap(tab[i], tab[i+1])
+ i <- 1
+ } else {
+ i <- i+1
+ }
+ }
+ return(tab)
+}
+set.seed(1)
+x <- rnorm(10)
+stopifnot(identical(sort(x), bubble.sort(x)))
+}
diff --git a/man/syncodons.Rd b/man/syncodons.Rd
new file mode 100644
index 0000000..2de6c44
--- /dev/null
+++ b/man/syncodons.Rd
@@ -0,0 +1,113 @@
+\name{syncodons}
+\alias{syncodons}
+\title{Synonymous codons}
+\description{
+ Returns all synonymous codons for each codon given
+}
+\usage{syncodons(codons, numcode = 1)}
+\arguments{
+ \item{codons}{ A sequence of codons as generated by \code{splitseq} }
+ \item{numcode}{ The genetic code number as in \code{translate} }
+}
+\value{
+ a list containing, for each codon given (list tags), all synonymous
+ codons (including the original one)
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{L. Palmeira, J.R. Lobry}
+\seealso{ \code{\link{synsequence}} }
+\examples{
+#
+# The four synonymous codons for Alanine in the standard genetic code:
+#
+syncodons("ggg")
+#
+# With a sequence:
+#
+toycds <- s2c("tctgagcaaataaatcgg")
+syncodons(splitseq(toycds))
+#
+# Sanity check with the standard genetic code:
+#
+stdgencode <- structure(list(
+ ttt = c("ttc", "ttt"),
+ ttc = c("ttc", "ttt"),
+ tta = c("cta", "ctc", "ctg", "ctt", "tta", "ttg"),
+ ttg = c("cta", "ctc", "ctg", "ctt", "tta", "ttg"),
+ tct = c("agc", "agt", "tca", "tcc", "tcg", "tct"),
+ tcc = c("agc", "agt", "tca", "tcc", "tcg", "tct"),
+ tca = c("agc", "agt", "tca", "tcc", "tcg", "tct"),
+ tcg = c("agc", "agt", "tca", "tcc", "tcg", "tct"),
+ tat = c("tac", "tat"),
+ tac = c("tac", "tat"),
+ taa = c("taa", "tag", "tga"),
+ tag = c("taa", "tag", "tga"),
+ tgt = c("tgc", "tgt"),
+ tgc = c("tgc", "tgt"),
+ tga = c("taa", "tag", "tga"),
+ tgg = "tgg",
+ ctt = c("cta", "ctc", "ctg", "ctt", "tta", "ttg"),
+ ctc = c("cta", "ctc", "ctg", "ctt", "tta", "ttg"),
+ cta = c("cta", "ctc", "ctg", "ctt", "tta", "ttg"),
+ ctg = c("cta", "ctc", "ctg", "ctt", "tta", "ttg"),
+ cct = c("cca", "ccc", "ccg", "cct"),
+ ccc = c("cca", "ccc", "ccg", "cct"),
+ cca = c("cca", "ccc", "ccg", "cct"),
+ ccg = c("cca", "ccc", "ccg", "cct"),
+ cat = c("cac", "cat"),
+ cac = c("cac", "cat"),
+ caa = c("caa", "cag"),
+ cag = c("caa", "cag"),
+ cgt = c("aga", "agg", "cga", "cgc", "cgg", "cgt"),
+ cgc = c("aga", "agg", "cga", "cgc", "cgg", "cgt"),
+ cga = c("aga", "agg", "cga", "cgc", "cgg", "cgt"),
+ cgg = c("aga", "agg", "cga", "cgc", "cgg", "cgt"),
+ att = c("ata", "atc", "att"),
+ atc = c("ata", "atc", "att"),
+ ata = c("ata", "atc", "att"),
+ atg = "atg",
+ act = c("aca", "acc", "acg", "act"),
+ acc = c("aca", "acc", "acg", "act"),
+ aca = c("aca", "acc", "acg", "act"),
+ acg = c("aca", "acc", "acg", "act"),
+ aat = c("aac", "aat"),
+ aac = c("aac", "aat"),
+ aaa = c("aaa", "aag"),
+ aag = c("aaa", "aag"),
+ agt = c("agc", "agt", "tca", "tcc", "tcg", "tct"),
+ agc = c("agc", "agt", "tca", "tcc", "tcg", "tct"),
+ aga = c("aga", "agg", "cga", "cgc", "cgg", "cgt"),
+ agg = c("aga", "agg", "cga", "cgc", "cgg", "cgt"),
+ gtt = c("gta", "gtc", "gtg", "gtt"),
+ gtc = c("gta", "gtc", "gtg", "gtt"),
+ gta = c("gta", "gtc", "gtg", "gtt"),
+ gtg = c("gta", "gtc", "gtg", "gtt"),
+ gct = c("gca", "gcc", "gcg", "gct"),
+ gcc = c("gca", "gcc", "gcg", "gct"),
+ gca = c("gca", "gcc", "gcg", "gct"),
+ gcg = c("gca", "gcc", "gcg", "gct"),
+ gat = c("gac", "gat"),
+ gac = c("gac", "gat"),
+ gaa = c("gaa", "gag"),
+ gag = c("gaa", "gag"),
+ ggt = c("gga", "ggc", "ggg", "ggt"),
+ ggc = c("gga", "ggc", "ggg", "ggt"),
+ gga = c("gga", "ggc", "ggg", "ggt"),
+ ggg = c("gga", "ggc", "ggg", "ggt")),
+
+.Names = c("ttt", "ttc", "tta", "ttg", "tct", "tcc", "tca", "tcg", "tat", "tac",
+"taa", "tag", "tgt", "tgc", "tga", "tgg", "ctt", "ctc", "cta",
+"ctg", "cct", "ccc", "cca", "ccg", "cat", "cac", "caa", "cag",
+"cgt", "cgc", "cga", "cgg", "att", "atc", "ata", "atg", "act",
+"acc", "aca", "acg", "aat", "aac", "aaa", "aag", "agt", "agc",
+"aga", "agg", "gtt", "gtc", "gta", "gtg", "gct", "gcc", "gca",
+"gcg", "gat", "gac", "gaa", "gag", "ggt", "ggc", "gga", "ggg"))
+#
+# Now the check:
+#
+currentresult <- syncodons(words(alphabet = s2c("tcag")))
+stopifnot(identical(stdgencode, currentresult))
+}
+\keyword{ utilities }
diff --git a/man/synsequence.Rd b/man/synsequence.Rd
new file mode 100644
index 0000000..236f946
--- /dev/null
+++ b/man/synsequence.Rd
@@ -0,0 +1,29 @@
+\name{synsequence}
+\alias{synsequence}
+\title{Random synonymous coding sequence generation}
+\description{
+ Generates a random synonymous coding sequence, according to a certain
+ codon usage bias
+}
+\usage{synsequence(sequence, numcode = 1, ucoweight = NULL)}
+\arguments{
+ \item{sequence}{ A nucleic acids sequence }
+ \item{numcode}{ The genetic code number as in \code{translate} }
+ \item{ucoweight}{ A list of weights containing the desired codon usage
+ bias as generated by \code{ucoweight} }
+}
+\value{
+ a sequence translating to the same protein sequence as the original
+ one (cf. \code{translate}), but containing synonymous codons
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{L. Palmeira}
+\seealso{ \code{\link{ucoweight}} }
+\examples{
+ data(ec999)
+ sequence=ec999[1][[1]]
+ synsequence(sequence,1,ucoweight(sequence))
+}
+\keyword{ utilities }
diff --git a/man/tablecode.Rd b/man/tablecode.Rd
new file mode 100644
index 0000000..5145f34
--- /dev/null
+++ b/man/tablecode.Rd
@@ -0,0 +1,46 @@
+\name{tablecode}
+\alias{tablecode}
+\title{ to plot genetic code as in textbooks }
+\description{
+This function plots a genetic code table as in textbooks, that is
+following the order \code{T > C > A > G} so that synonymous codons
+are almost always in the same boxes.
+}
+\usage{
+tablecode(numcode = 1, urn.rna = s2c("TCAG"), dia = FALSE, latexfile = NULL,
+label = latexfile, size = "normalsize", caption = NULL,
+preaa = rep("", 64), postaa = rep("", 64),
+precodon = preaa, postcodon = postaa)
+}
+\arguments{
+ \item{numcode}{The genetic code number as in \code{translate} }
+ \item{urn.rna}{The letters to display codons, use s2c("UCAG")
+ if you want the code in terms of RNA sequence}
+ \item{latexfile}{The name of a LaTex file if you want to redirect the output}
+ \item{label}{The label for the LaTeX table}
+ \item{size}{The LaTex size of characters for the LaTeX table}
+ \item{preaa}{A string to insert before the amino-acid in the LaTeX table}
+ \item{postaa}{A string to insert after the amino-acid in the LaTeX table}
+ \item{precodon}{A string to insert before the codon in the LaTeX table}
+ \item{postcodon}{A string to insert after the codon in the LaTeX table}
+ \item{caption}{The caption of the LaTeX table}
+ \item{dia}{to produce a yellow/blue plot for slides}
+}
+\details{
+The codon order for \code{preaa}, \code{postaa}, \code{precodon}, and
+\code{postcodon} should be the same as in
+\code{paste(paste(rep(s2c("tcag"), each =16), s2c("tcag"), sep = ""), rep(s2c("tcag"), each = 4), sep = "")}
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{translate}}, \code{\link{syncodons}} }
+\examples{
+#
+# Show me the standard genetic code:
+#
+
+ tablecode()
+}
+\keyword{ utilities }
diff --git a/man/test.co.recstat.Rd b/man/test.co.recstat.Rd
new file mode 100755
index 0000000..9f8e5a6
--- /dev/null
+++ b/man/test.co.recstat.Rd
@@ -0,0 +1,48 @@
+\name{test.co.recstat}
+\alias{test.co.recstat}
+\title{Tests if regions located between Stop codons contain putative CDSs.}
+\description{This test uses columns (codons) factor scores computed by \code{recstat} in order
+to determine if the regions located between two Stop codons correspond to putative CDSs.}
+\usage{test.co.recstat(rec, fac = 1, length.min = 150, stop.max = 0.2, win.lim = 0.8,
+ direct = TRUE, level = 0.01)}
+\arguments{
+ \item{rec}{list of elements returned by \code{recstat} function.}
+ \item{fac}{axis of the CA to use for test (4 \eqn{\ge} \code{fac}
+ \eqn{\ge} 1).}
+ \item{length.min}{minimal length between two Stop codons.}
+ \item{stop.max}{threshold for Stop codons relative position in a window to determine if this
+ window can be used for test computation.}
+ \item{win.lim}{minimum proportion of windows inside a region showing a p-value below the
+ threshold for Kruskal-Wallis test.}
+ \item{direct}{a logical for the choice of direct or reverse strand.}
+ \item{level}{p-value threshold for Kruskal-Wallis test.}
+}
+\details{The test is computed for all windows located between two Stop codons separated by at least
+\code{length.min} nucleotides. For each window inside a region considered, a Kruskal-Wallis test
+is computed on the factor scores of the codons found in this window, this for the three possible
+reading frames. If a proportion of at least \code{win.lim} windows in the region reject the null
+hypothesis of means equality between the reading frames, then, there is a good probability that
+a CDS is located in the region.\cr
+
+Inside the first and the last windows of a region submitted to the test, the relative position of
+the two Stop codons is used to determine if those windows can be used in the analysis. If the
+first Stop is located within the \code{stop.max} fraction of the 5' end of the window, then this
+window is kept in the analysis. In the same way, if the second Stop is located within the
+\code{stop.max} fraction of the 3' end of the window, this window is also kept in the analysis.
+}
+\value{The result is returned as a list containing three matrices (one for each reading frame).
+All matrices have the same structure, with rows corresponding to the regions between
+two Stop codons. Columns \code{Start} and \code{End} give the location of starting and ending
+positions of the region; and \code{CDS} is a binary indicator equal to 1 if a putative CDS is
+predicted, and to 0 if not.
+}
+\author{O. Clerc, G. Perrière}
+\seealso{\code{\link{test.li.recstat}}}
+\examples{
+ff <- system.file("sequences/ECOUNC.fsa", package = "seqinr")
+seq <- read.fasta(ff)
+rec <- recstat(seq[[1]], seqname = getName(seq))
+test.co.recstat(rec)
+}
+\keyword{sequence}
+\keyword{correspondence analysis}
diff --git a/man/test.li.recstat.Rd b/man/test.li.recstat.Rd
new file mode 100755
index 0000000..d40f82c
--- /dev/null
+++ b/man/test.li.recstat.Rd
@@ -0,0 +1,49 @@
+\name{test.li.recstat}
+\alias{test.li.recstat}
+\title{Tests if regions located between Stop codons contain putative CDSs.}
+\description{This test uses rows (windows) factor scores computed by \code{recstat} in order to
+determine if the regions located between two Stop codons correspond to putative CDSs.}
+\usage{test.li.recstat(rec, fac = 1, length.min = 150, stop.max = 0.2,
+ direct = TRUE, level = 0.05)}
+\arguments{
+ \item{rec}{list of elements returned by \code{recstat} function.}
+ \item{fac}{axis of the CA to use for test (4 \eqn{\ge} \code{fac}
+ \eqn{\ge} 1).}
+ \item{length.min}{minimal length between two Stop codons.}
+ \item{stop.max}{threshold for Stop codons relative position in a window to determine if this
+ window can be used for test computation.}
+ \item{direct}{a logical for the choice of direct or reverse strand.}
+ \item{level}{p-value threshold for t-test.}
+}
+\details{The test is computed for all regions between two Stop codons separated by at least
+\code{length.min} nucleotides, this for the three possible reading frames of a DNA strand. For
+each region considered, two t-tests are computed for comparing the mean of the factor scores of
+the windows from the reading frame in which the region is located with the means of the factor
+scores from the corresponding windows in the two other reading frames. If both t-tests reject
+the null hypothesis of means equality, then there is a good probability that a CDS is located in
+the region.\cr
+
+Inside the first and the last windows of a region submitted to the test, the relative position of
+the two Stop codons is used to determine if those windows can be used in the analysis. If the
+first Stop is located within the \code{stop.max} fraction of the 5' end of the window, then this
+window is kept in the analysis. In the same way, if the second Stop is located within the
+\code{stop.max} fraction of the 3' end of the window, this window is also kept in the analysis.
+}
+\value{The result is returned as a list containing three matrices (one for each reading frame).
+All matrices have the same structure, with rows corresponding to the regions between two Stop
+codons. Columns \code{Start} and \code{End} give the location of starting and ending positions
+of the region; \code{Mean i} gives the mean of the factor scores for the windows located in the
+region, this for reading frame \code{i}; \code{t(i,j)} gives the p-value of the t-test computed
+between the means from reading frames \code{i} and \code{j}; and \code{CDS} is a binary
+indicator equal to 1 if a putative CDS is predicted, and to 0 if not.
+}
+\author{O. Clerc, G. Perrière}
+\seealso{\code{\link{test.co.recstat}}}
+\examples{
+ff <- system.file("sequences/ECOUNC.fsa", package = "seqinr")
+seq <- read.fasta(ff)
+rec <- recstat(seq[[1]], seqname = getName(seq))
+test.li.recstat(rec)
+}
+\keyword{sequence}
+\keyword{correspondence analysis}
diff --git a/man/toyaa.Rd b/man/toyaa.Rd
new file mode 100644
index 0000000..6f5065c
--- /dev/null
+++ b/man/toyaa.Rd
@@ -0,0 +1,29 @@
+\name{toyaa}
+\alias{toyaa}
+\docType{data}
+\title{A toy example of amino-acid counts in three proteins}
+\encoding{latin1}
+\description{
+This is a toy data set to illustrate the importance of metric choice.
+}
+\usage{data(toyaa)}
+\format{
+ A data frame with 3 observations on the following 3 variables:
+ \describe{
+ \item{Ala}{Alanine counts}
+ \item{Val}{Valine counts}
+ \item{Cys}{Cysteine counts}
+ }
+}
+\source{
+This toy example was inspired by Gautier, C:
+Analyses statistiques et �volution des s�quences d'acides nucl�iques.
+PhD thesis (1987), Universit� Claude Bernard - Lyon I.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\examples{
+data(toyaa)
+}
+\keyword{datasets}
diff --git a/man/toycodon.Rd b/man/toycodon.Rd
new file mode 100644
index 0000000..8b34b2e
--- /dev/null
+++ b/man/toycodon.Rd
@@ -0,0 +1,20 @@
+\name{toycodon}
+\alias{toycodon}
+\docType{data}
+\title{A toy example of codon counts in three coding sequences}
+\description{
+This is a toy data set to illustrate synonymous and non-synonymous codon usage analyses.
+}
+\usage{data(toyaa)}
+\format{
+ A data frame with 3 observations (coding sequences) for 10 codons.}
+\source{
+Created for release 1.0-4 of seqinr's vignette.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\examples{
+data(toycodon)
+}
+\keyword{datasets}
diff --git a/man/translate.Rd b/man/translate.Rd
new file mode 100644
index 0000000..d783022
--- /dev/null
+++ b/man/translate.Rd
@@ -0,0 +1,128 @@
+\name{translate}
+\alias{translate}
+\title{ Translate nucleic acid sequences into proteins }
+\description{
+ This function translates nucleic acid sequences into the corresponding
+ peptide sequence. It can translate in any of the 3 forward or three
+ reverse sense frames. In the case of reverse sense, the reverse-complement
+ of the sequence is taken. It can translate using the standard (universal)
+ genetic code and also with non-standard codes. Ambiguous bases can also
+ be handled.
+}
+\usage{
+translate(seq, frame = 0, sens = "F", numcode = 1, NAstring = "X", ambiguous = FALSE)
+}
+\arguments{
+ \item{seq}{ the sequence to translate as a vector of single characters in lower case letters. }
+ \item{frame}{ Frame(s) (0,1,2) to translate. By default the frame \code{0} is used. }
+ \item{sens}{ Sense to translate: \code{F} for forward sense and \code{R} for reverse sense. }
+ \item{numcode}{ The ncbi genetic code number for translation. By default the standard genetic code is used. }
+ \item{NAstring}{ How to translate amino-acids when there are ambiguous bases in codons. }
+ \item{ambiguous}{ If TRUE, ambiguous bases are taken into account so that for instance
+ GGN is translated to Gly in the standard genetic code. }
+}
+\details{
+ The following genetic codes are described here. The number preceding each code
+ corresponds to \code{numcode}.
+\describe{
+ \item{1}{ standard }
+ \item{2}{ vertebrate.mitochondrial }
+ \item{3}{ yeast.mitochondrial }
+ \item{4}{ protozoan.mitochondrial+mycoplasma }
+ \item{5}{ invertebrate.mitochondrial }
+ \item{6}{ ciliate+dasycladaceal }
+ \item{9}{ echinoderm+flatworm.mitochondrial }
+ \item{10}{ euplotid }
+ \item{11}{ bacterial+plantplastid }
+ \item{12}{ alternativeyeast }
+ \item{13}{ ascidian.mitochondrial }
+ \item{14}{ alternativeflatworm.mitochondrial }
+ \item{15}{ blepharism }
+ \item{16}{ chlorophycean.mitochondrial }
+ \item{21}{ trematode.mitochondrial }
+ \item{22}{ scenedesmus.mitochondrial }
+ \item{23}{ thraustochytrium.mitochondria }
+ \item{24}{Pterobranchia.mitochondrial}
+ \item{25}{CandidateDivision.SR1+Gracilibacteria}
+ \item{26}{Pachysolen.tannophilus}
+}
+}
+\value{
+ \code{translate} returns a vector of single characters containing the peptide sequence in
+ the standard one-letter IUPAC code. Termination (STOP) codons are translated by
+ the character '*'.
+}
+\references{
+The genetic codes have been taken from the ncbi taxonomy database:
+\url{https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi}.
+Last update October 05, 2000.\cr
+The IUPAC one-letter code for aminoacids is described at:
+\url{http://www.chem.qmul.ac.uk/iupac/AminoAcid/}
+
+\code{citation("seqinr")}
+}
+\author{D. Charif, J.R. Lobry}
+\seealso{
+Use \code{\link{tolower}} to change upper case letters into lower case letters.
+For coding sequences obtained from an ACNUC server with \code{\link{query}} it's
+better to use the function \code{\link{getTrans}} so that the relevant genetic
+code and the relevant frame are automatically used.
+The genetic codes are given in the object \code{\link{SEQINR.UTIL}}, a more
+human readable form is given by the function \code{\link{tablecode}}.
+Use \code{\link{aaa}} to get the three-letter code for amino-acids.}
+\examples{
+##
+## Toy CDS example invented by Leonor Palmeira:
+##
+toycds <- s2c("tctgagcaaataaatcgg")
+translate(seq = toycds) # should be c("S", "E", "Q", "I", "N", "R")
+##
+## Toy CDS example with ambiguous bases:
+##
+toycds2 <- s2c("tcngarcarathaaycgn")
+translate(toycds2) # should be c("X", "X", "X", "X", "X", "X")
+translate(toycds2, ambiguous = TRUE) # should be c("S", "E", "Q", "I", "N", "R")
+translate(toycds2, ambiguous = TRUE, numcode = 2) # should be c("S", "E", "Q", "X", "N", "R")
+##
+## Real CDS example:
+##
+realcds <- read.fasta(file = system.file("sequences/malM.fasta", package ="seqinr"))[[1]]
+translate(seq = realcds)
+# Biologically correct, only one stop codon at the end
+translate(seq = realcds, frame = 3, sens = "R", numcode = 6)
+# Biologically meaningless, note the in-frame stop codons
+
+\dontrun{
+## Need internet connection.
+## Translation of the following EMBL entry:
+##
+## FT CDS join(complement(153944..154157),complement(153727..153866),
+## FT complement(152185..153037),138523..138735,138795..138955)
+## FT /codon_start=1
+## FT /db_xref="FLYBASE:FBgn0002781"
+## FT /db_xref="GOA:Q86B86"
+## FT /db_xref="TrEMBL:Q86B86"
+## FT /note="mod(mdg4) gene product from transcript CG32491-RZ;
+## FT trans splicing"
+## FT /gene="mod(mdg4)"
+## FT /product="CG32491-PZ"
+## FT /locus_tag="CG32491"
+## FT /protein_id="AAO41581.1"
+## FT /translation="MADDEQFSLCWNNFNTNLSAGFHESLCRGDLVDVSLAAEGQIVKA
+## FT HRLVLSVCSPFFRKMFTQMPSNTHAIVFLNNVSHSALKDLIQFMYCGEVNVKQDALPAF
+## FT ISTAESLQIKGLTDNDPAPQPPQESSPPPAAPHVQQQQIPAQRVQRQQPRASARYKIET
+## FT VDDGLGDEKQSTTQIVIQTTAAPQATIVQQQQPQQAAQQIQSQQLQTGTTTTATLVSTN
+## FT KRSAQRSSLTPASSSAGVKRSKTSTSANVMDPLDSTTETGATTTAQLVPQQITVQTSVV
+## FT SAAEAKLHQQSPQQVRQEEAEYIDLPMELPTKSEPDYSEDHGDAAGDAEGTYVEDDTYG
+## FT DMRYDDSYFTENEDAGNQTAANTSGGGVTATTSKAVVKQQSQNYSESSFVDTSGDQGNT
+## FT EAQVTQHVRNCGPQMFLISRKGGTLLTINNFVYRSNLKFFGKSNNILYWECVQNRSVKC
+## FT RSRLKTIGDDLYVTNDVHNHMGDNKRIEAAKAAGMLIHKKLSSLTAADKIQGSWKMDTE
+## FT GNPDHLPKM"
+choosebank("emblTP")
+trans <- query("trans", "N=AE003734.PE35")
+getTrans(trans$req[[1]])
+## Complex transsplicing operations, the correct frame and the correct
+## genetic code are automatically used for translation into protein.
+}
+}
+\keyword{ manip }
diff --git a/man/trimSpace.Rd b/man/trimSpace.Rd
new file mode 100644
index 0000000..b4ab56d
--- /dev/null
+++ b/man/trimSpace.Rd
@@ -0,0 +1,74 @@
+\name{trimSpace}
+\alias{trimSpace}
+\title{Trim leading and/or trailing spaces in strings}
+\description{
+ This function removes from a character vector the longest successive run
+ of space characters starting at the begining of the strings (leading space),
+ or the longest successive run of space characters at the end of the strings
+ (trailing space), or both (and this is the default behaviour).
+}
+\usage{
+trimSpace(x, leading = TRUE, trailing = TRUE, space = "[:space:]")
+}
+\arguments{
+ \item{x}{a character vector}
+ \item{leading}{logical defaulting to \code{TRUE}: should leading spaces be trimed off?}
+ \item{trailing}{logical defaulting to \code{TRUE}: should trailing spaces be trimed off?}
+ \item{space}{an extended regular expression defining space characters}
+}
+\details{
+ The default value for the space character definition is large: in addition to the
+ usual space, other character such as the tabulation and newline character are
+ considered as space characters. See extended regular expression for a complete list.
+}
+\value{
+ a character vector with the same length as x.
+}
+\references{
+ \code{citation("seqinr")}.
+}
+\seealso{Extended regular expressionsare described in \link{regular expression}
+ (aka \code{\link{regexp}}).
+}
+\author{J.R. Lobry}
+\examples{
+ #
+ # Simple use:
+ #
+stopifnot( trimSpace(" seqinR ") == "seqinR" )
+
+ #
+ # Basic use, remove space at both ends:
+ #
+testspace <- c(" with leading space", "with trailing space ", " with both ")
+stopifnot(all( trimSpace(testspace) == c("with leading space",
+ "with trailing space",
+ "with both")))
+
+ #
+ # Remove only leading space:
+ #
+stopifnot(all( trimSpace(testspace, trailing = FALSE) == c("with leading space",
+ "with trailing space ",
+ "with both ")))
+
+ #
+ # Remove only trailing space:
+ #
+stopifnot(all( trimSpace(testspace, leading = FALSE) == c(" with leading space",
+ "with trailing space",
+ " with both")))
+
+ #
+ # This should do nothing:
+ #
+stopifnot(all( trimSpace(testspace, leading = FALSE, trailing = FALSE) == testspace))
+
+ #
+ # How to use alternative space characters:
+ #
+allspaces <- "\t\n\f\r seqinR \t\n\f\r"
+stopifnot(trimSpace(allspaces) == "seqinR")
+stopifnot(trimSpace(allspaces, space = "\t\n") == "\f\r seqinR \t\n\f\r")
+}
+\keyword{manip}
diff --git a/man/uco.Rd b/man/uco.Rd
new file mode 100644
index 0000000..6761339
--- /dev/null
+++ b/man/uco.Rd
@@ -0,0 +1,124 @@
+\name{uco}
+\alias{uco}
+\alias{rscu}
+\title{ Codon usage indices }
+\description{
+ \code{uco} calculates some codon usage indices: the codon counts \code{eff}, the relative frequencies \code{freq} or the Relative Synonymous Codon Usage \code{rscu}.
+}
+\usage{
+uco(seq, frame = 0, index = c("eff", "freq", "rscu"), as.data.frame = FALSE,
+NA.rscu = NA)
+}
+\arguments{
+ \item{seq}{ a coding sequence as a vector of chars }
+ \item{frame}{ an integer (0, 1, 2) giving the frame of the coding sequence }
+ \item{index}{ codon usage index choice, partial matching is allowed.
+ \code{eff} for codon counts,
+ \code{freq} for codon relative frequencies,
+ and \code{rscu} the RSCU index}
+ \item{as.data.frame}{ logical. If \code{TRUE}: all indices are returned into a data frame.}
+ \item{NA.rscu}{ when an amino-acid is missing, RSCU are no more defined and repported
+ as missing values (\code{NA}). You can force them to another value (typically 0 or
+ 1) with this argument.}
+}
+\details{
+ Codons with ambiguous bases are ignored.\cr
+
+ RSCU is a simple measure of non-uniform usage of synonymous codons in a coding sequence
+ (Sharp \emph{et al.} 1986).
+ RSCU values are the number of times a particular codon is observed, relative to the number
+ of times that the codon would be observed for a uniform synonymous codon usage (i.e. all the
+ codons for a given amino-acid have the same probability).
+ In the absence of any codon usage bias, the RSCU values would be 1.00 (this is the case
+ for sequence \code{cds} in the exemple thereafter). A codon that is used
+ less frequently than expected will have an RSCU value of less than 1.00 and vice versa for a codon
+ that is used more frequently than expected.\cr
+
+ Do not use correspondence analysis on RSCU tables as this is a source of artifacts
+ (Perrière and Thioulouse 2002, Suzuki \emph{et al.} 2008). Within-aminoacid correspondence analysis is a
+ simple way to study synonymous codon usage (Charif \emph{et al.} 2005). For an introduction
+ to correspondence analysis and within-aminoacid correspondence analysis see the
+ chapter titled \emph{Multivariate analyses} in the seqinR manual that ships with the
+ seqinR package in the \bold{doc} folder. You can also use internal correspondence
+ analysis if you want to analyze simultaneously a row-block structure such as the
+ within and between species variability (Lobry and Chessel 2003).\cr
+
+ If \code{as.data.frame} is FALSE, \code{uco} returns one of these:
+ \describe{
+ \item{ eff }{ a table of codon counts }
+ \item{ freq }{ a table of codon relative frequencies }
+ \item{ rscu }{ a numeric vector of relative synonymous codon usage values}
+ }
+ If \code{as.data.frame} is TRUE, \code{uco} returns a data frame with five columns:
+ \describe{
+ \item{ aa }{ a vector containing the name of amino-acid }
+ \item{ codon }{ a vector containing the corresponding codon }
+ \item{ eff }{ a numeric vector of codon counts }
+ \item{ freq }{ a numeric vector of codon relative frequencies }
+ \item{ rscu }{ a numeric vector of RSCU index }
+ }
+}
+\value{
+ If \code{as.data.frame} is FALSE, the default, a table for \code{eff} and \code{freq} and
+ a numeric vector for \code{rscu}. If \code{as.data.frame} is TRUE,
+ a data frame with all indices is returned.
+}
+\references{
+\code{citation("seqinr")} \cr
+
+Sharp, P.M., Tuohy, T.M.F., Mosurski, K.R. (1986) Codon usage in yeast: cluster
+analysis clearly differentiates highly and lowly expressed genes.
+\emph{Nucl. Acids. Res.}, \bold{14}:5125-5143.\cr
+
+Perrière, G., Thioulouse, J. (2002) Use and misuse of correspondence analysis in
+codon usage studies. \emph{Nucl. Acids. Res.}, \bold{30}:4548-4555.\cr
+
+Lobry, J.R., Chessel, D. (2003) Internal correspondence analysis of codon and
+amino-acid usage in thermophilic bacteria.
+\emph{Journal of Applied Genetics}, \bold{44}:235-261. \url{http://jag.igr.poznan.pl/2003-Volume-44/2/pdf/2003_Volume_44_2-235-261.pdf}.\cr
+
+Charif, D., Thioulouse, J., Lobry, J.R., Perrière, G. (2005) Online
+Synonymous Codon Usage Analyses with the ade4 and seqinR packages.
+\emph{Bioinformatics}, \bold{21}:545-547. \url{https://pbil.univ-lyon1.fr/members/lobry/repro/bioinfo04/}.\cr
+
+Suzuki, H., Brown, C.J., Forney, L.J., Top, E. (2008)
+Comparison of Correspondence Analysis Methods for Synonymous Codon Usage in Bacteria.
+\emph{DNA Research}, \bold{15}:357-365. \url{http://dnaresearch.oxfordjournals.org/cgi/reprint/15/6/357}.
+
+}
+\author{D. Charif, J.R. Lobry, G. Perrière}
+\examples{
+
+## Show all possible codons:
+words()
+
+## Make a coding sequence from this:
+(cds <- s2c(paste(words(), collapse = "")))
+
+## Get codon counts:
+uco(cds, index = "eff")
+
+## Get codon relative frequencies:
+uco(cds, index = "freq")
+
+## Get RSCU values:
+uco(cds, index = "rscu")
+
+## Show what happens with ambiguous bases:
+uco(s2c("aaannnttt"))
+
+## Use a real coding sequence:
+rcds <- read.fasta(file = system.file("sequences/malM.fasta", package = "seqinr"))[[1]]
+uco( rcds, index = "freq")
+uco( rcds, index = "eff")
+uco( rcds, index = "rscu")
+uco( rcds, as.data.frame = TRUE)
+
+## Show what happens with RSCU when an amino-acid is missing:
+ecolicgpe5 <- read.fasta(file = system.file("sequences/ecolicgpe5.fasta",package="seqinr"))[[1]]
+uco(ecolicgpe5, index = "rscu")
+
+## Force NA to zero:
+uco(ecolicgpe5, index = "rscu", NA.rscu = 0)
+}
+\keyword{ manip }
diff --git a/man/ucoweight.Rd b/man/ucoweight.Rd
new file mode 100644
index 0000000..1fd66c6
--- /dev/null
+++ b/man/ucoweight.Rd
@@ -0,0 +1,28 @@
+\name{ucoweight}
+\alias{ucoweight}
+\title{Weight of each synonymous codon}
+\description{
+ Returns a list containing, for each of the 20 amino acids + STOP
+ codon, the codon usage bias of each of the synonymous codon according
+ to a given codon sequence.
+}
+\usage{ucoweight(sequence, numcode = 1)}
+\arguments{
+ \item{sequence}{ A nucleic acids sequence }
+ \item{numcode}{ The genetic code number as in \code{translate} }
+}
+\value{
+ a list containing, for each of the 20 amino acids and STOP
+ codon (list tags), the weight of each synonymous codon (including the
+ original one).
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{L. Palmeira}
+\seealso{ \code{\link{synsequence}} }
+\examples{
+ data(ec999)
+ ucoweight(ec999[1][[1]])
+}
+\keyword{ utilities }
diff --git a/man/waterabs.Rd b/man/waterabs.Rd
new file mode 100644
index 0000000..e6d94b4
--- /dev/null
+++ b/man/waterabs.Rd
@@ -0,0 +1,95 @@
+\name{waterabs}
+\alias{waterabs}
+\docType{data}
+\title{Light absorption by the water column}
+\description{
+The absorption of light by water is highly dependent on the wavelength, this
+dataset gives the absorption coefficients from 200 to 700 nm.
+}
+\usage{data(waterabs)}
+\format{
+ A data.frame with 2 columns:
+ \describe{
+ \item{lambda}{wavelength in nm}
+ \item{abs}{absorption coefficient in 1/cm}
+ }
+}
+
+\source{
+Data were compiled by Palmeira (2007) from the cited references.
+
+The example section allows to reproduce the left part of figure
+2.7 from Palmeira (2007):
+
+\if{html}{\figure{waterabs.jpg}{options: width=400}}
+\if{latex}{\figure{waterabs.jpg}{options: width=12cm}}
+
+}
+\references{
+Palmeira, L. (2007) \emph{Analyse et modélisation des dépendances entre
+ sites voisins dans l'évolution des séquences d'ADN}, PhD thesis,
+Université Claude Bernard - Lyon I.\cr
+
+Litjens R. A., Quickenden T. I. and Freeman C. G. (1999). Visible and
+near-ultraviolet absorption spectrum of liquid water. \emph{Applied Optics},
+\bold{38}:1216-1223.\cr
+
+Quickenden T. I. & Irvin J. A. (1980). The ultraviolet absorption spectrum
+of liquid water. \emph{The Journal of Chemical Physics}, \bold{72}:4416-4428.\cr
+
+\code{citation("seqinr")}
+}
+\examples{
+data(waterabs)
+
+d <- 100*seq(from = 0, to = 150, by = 1) # depth in cm
+lambda <- waterabs$lambda # wavelength in nm
+abs <- waterabs$absorption # absorption coefficient cm-1
+#
+# Smooth signal with cubic splines
+#
+ tmp <- spline(lambda, abs, n = 255)
+ lambda <- tmp$x
+ abs <- tmp$y
+
+ zun <- sapply(abs,function(x) 10^(-x*d))
+ z <- sapply(nrow(zun):1, function(x) zun[x,])
+#
+# Set up world coordinates:
+#
+ plot.new()
+ plot.window(xlim = range(lambda), ylim = range(d), xaxs = "i", yaxs = "i")
+#
+# Annotate:
+#
+ title(ylab = 'Depth under water surface (m)', xlab = "Wavelength (nm)",
+ main = "Light absorption by the water column")
+ axis(2 , at = seq(0, 15000, l = 7),
+ labels = rev(c("0","25","50","75","100","125","150")), las = 1)
+ axis(1,at=(3:6)*100,labels= TRUE)
+#
+# Show me rainbow colors:
+#
+ alpha <- 1
+ coul=c(rep(rgb(1,1,1, alpha = alpha), 181),
+ rev(hsv(h=seq(0,5/6,l=320),alpha = alpha)))
+ rect(seq(200,699), 0, seq(201,700), 15000 , col = coul, border = coul)
+#
+# Grey scale:
+#
+ ngris <- 5
+ image(x = lambda, y = d, z = z, col = rgb(1:ngris, 1:ngris, 1:ngris, alpha = 0.7*(ngris:1),
+ max = ngris),
+ axes = F, add = TRUE,
+ breaks = seq(from = min(z), to = max(z), length = ngris + 1))
+
+#
+# Contour lines:
+#
+ contour(x = lambda, y = d, z = z, add = TRUE, drawlabels = TRUE,labcex= 0.75,
+ col='black',
+ levels = seq(from = min(z), to = max(z), length = ngris + 1))
+ box()
+
+}
+\keyword{datasets}
diff --git a/man/where.is.this.acc.Rd b/man/where.is.this.acc.Rd
new file mode 100644
index 0000000..44e249d
--- /dev/null
+++ b/man/where.is.this.acc.Rd
@@ -0,0 +1,31 @@
+\name{where.is.this.acc}
+\alias{where.is.this.acc}
+\title{Scans databases for a given sequence accession number}
+\description{
+This function loops over all availabale ACNUC databases to look for
+a given sequence accession number. This is useful when you have
+a sequence accession number and you don't know in which database
+it is present.
+}
+\usage{
+where.is.this.acc(acc, stopAtFirst = TRUE, ...)
+}
+\arguments{
+ \item{acc}{An accession number as a string of characters such as \code{"NC_001416"}.}
+ \item{stopAtFirst}{Logical. If TRUE, the default, the function stops at the first
+ database where the accession number is found.}
+ \item{\dots}{Arguments passed to the function \code{\link{choosebank}}.}
+}
+\value{
+The function resturns invisibly a vector of strings of characters for the names of the ACNUC
+databases in which the accession number was found.
+}
+\references{\code{citation("seqinr")}}
+\author{J.R. Lobry}
+
+\seealso{\code{\link{choosebank}} to open a given ACNUC database.}
+\examples{
+ \dontrun{# Need internet connection
+ where.is.this.acc("NC_001416") # first found in phever2dna bank (2016-06-01)
+ }
+}
diff --git a/man/words.Rd b/man/words.Rd
new file mode 100644
index 0000000..d613cf4
--- /dev/null
+++ b/man/words.Rd
@@ -0,0 +1,49 @@
+\name{words}
+\alias{words}
+\title{ To get all words from an alphabet.}
+\description{
+Generates a vectors of all the words from a given alphabet,
+with right positions varying faster, for instance if the
+\code{alphabet} is \code{(c("0","1")} and the \code{length}
+is 2 you will obtain \code{c("00", "01", "10", "11")}
+}
+\usage{
+words(length = 3, alphabet = s2c("acgt"))
+}
+\arguments{
+ \item{length}{ the number of characters in the words }
+ \item{alphabet}{ a vector of characters }
+}
+\value{
+A vector of string whith \code{length} characters.
+}
+\author{J.R. Lobry}
+\references{
+ \code{citation("seqinr")}
+}
+\seealso{ \code{\link{kronecker}}, \code{\link{outer}} }
+\examples{
+#
+# Get all 64 codons:
+#
+stopifnot(all(words() ==
+c("aaa", "aac", "aag", "aat", "aca", "acc", "acg", "act", "aga", "agc", "agg",
+ "agt", "ata", "atc", "atg", "att","caa", "cac", "cag", "cat", "cca", "ccc",
+ "ccg", "cct", "cga", "cgc", "cgg", "cgt", "cta", "ctc", "ctg", "ctt", "gaa",
+ "gac", "gag", "gat", "gca", "gcc", "gcg", "gct", "gga", "ggc", "ggg", "ggt",
+ "gta", "gtc", "gtg", "gtt", "taa", "tac", "tag", "tat", "tca", "tcc", "tcg",
+ "tct", "tga", "tgc", "tgg", "tgt", "tta", "ttc", "ttg", "ttt")))
+#
+# Get all codons with u c a g for bases:
+#
+words(alphabet = s2c("ucag"))
+#
+# Get all tetranucleotides:
+#
+words(length = 4)
+#
+# Get all dipeptides:
+#
+words(length = 2, alphabet = a()[-1])
+}
+\keyword{ utilities }
diff --git a/man/words.pos.Rd b/man/words.pos.Rd
new file mode 100644
index 0000000..fdf9be8
--- /dev/null
+++ b/man/words.pos.Rd
@@ -0,0 +1,49 @@
+\name{words.pos}
+\alias{words.pos}
+\title{ Positions of possibly degenerated motifs within sequences }
+\description{
+\code{word.pos} searches all the occurences of the motif \code{pattern}
+within the sequence \code{text} and returns their positions. This
+function is based on \code{regexp} allowing thus for complex motif searches.
+The main difference with \code{\link{gregexpr}} is that non disjoint matches
+are reported here.
+}
+\usage{
+words.pos(pattern, text, ignore.case = FALSE,
+ perl = TRUE, fixed = FALSE, useBytes = TRUE, ...)
+}
+\arguments{
+ \item{pattern}{character string containing a \link{regular expression} (or character string for \code{fixed = TRUE}) to be matched in the given character vector.}
+ \item{text}{ a character vector where matches are sought. }
+ \item{ignore.case}{if \code{FALSE}, the pattern matching is case sensitive and if \code{TRUE}, case is ignored during matching.}
+ \item{perl}{logical. Should perl-compatible regexps be used if available?
+Has priority over \code{extended}.}
+ \item{fixed}{logical. If \code{TRUE}, pattern is a string to be matched as is. Overrides all conflicting arguments.}
+ \item{useBytes}{logical. If \code{TRUE} the matching is done byte-by-byte rather than character-by-character.}
+ \item{...}{arguments passed to \code{\link{regexpr}}.}
+}
+\details{
+Default parameter values have been tuned for speed when working biological sequences.
+}
+\value{
+a vector of positions for which the motif \code{pattern} was
+found in the sequence \code{text}.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{J.R. Lobry}
+\seealso{ \code{\link{regexpr}} }
+\examples{
+myseq <- "tatagaga"
+words.pos("t", myseq) # Should be 1 3
+words.pos("tag", myseq) # Should be 3
+words.pos("ga", myseq) # Should be 5 7
+# How to specify ambiguous base ? Look for YpR motifs by
+words.pos("[ct][ag]", myseq) # Should be 1 3
+#
+# Show the difference with gregexpr:
+#
+words.pos("toto", "totototo") # 1 3 5 (three overlapping matches)
+unlist(gregexpr("toto", "totototo")) # 1 5 (two disjoint matches)
+}
diff --git a/man/write.fasta.Rd b/man/write.fasta.Rd
new file mode 100644
index 0000000..4d5eaee
--- /dev/null
+++ b/man/write.fasta.Rd
@@ -0,0 +1,44 @@
+\name{write.fasta}
+\alias{write.fasta}
+\title{ Write sequence(s) into a file in fasta format }
+\description{
+ Writes one or more sequences into a file in FASTA format.
+}
+
+\usage{write.fasta(sequences, names, file.out, open = "w", nbchar = 60, as.string = FALSE)}
+
+\arguments{
+ \item{sequences}{ A DNA or protein sequence (in the form of a
+ vector of single characters by default) or a list of such sequences. }
+ \item{as.string}{ FALSE. When set to TRUE sequences are in the
+ form of strings instead of vectors of single characters. }
+ \item{names}{ The name(s) of the sequences. }
+ \item{nbchar}{ The number of characters per line (default: 60) }
+ \item{file.out}{ The name of the output file. }
+ \item{open}{ Mode to open the output file, use "w" to write into
+ a new file, use "a" to append at the end of an already existing file.}
+}
+\value{
+ none.
+}
+\references{
+ \code{citation("seqinr")}
+}
+\author{A. Necşulea}
+\seealso{ \code{\link{read.fasta}} }
+\examples{
+## Read 3 sequences from a FASTA file:
+ortho <- read.fasta(file = system.file("sequences/ortho.fasta", package =
+"seqinr"))
+
+## Select only third codon positions:
+ortho3 <- lapply(ortho, function(x) x[seq(from = 3, to = length(x), by = 3)])
+
+## Write the 3 modified sequences to a file:
+write.fasta(sequences = ortho3, names = names(ortho3), nbchar = 80, file.out = "ortho3.fasta")
+
+## Read them again from the same file and check that sequences are preserved:
+ortho3bis <- read.fasta("ortho3.fasta", set.attributes = FALSE)
+stopifnot(identical(ortho3bis, ortho3))
+}
+\keyword{ utilities }
diff --git a/man/zscore.Rd b/man/zscore.Rd
new file mode 100644
index 0000000..027575f
--- /dev/null
+++ b/man/zscore.Rd
@@ -0,0 +1,106 @@
+\name{dinucleotides}
+\alias{rho}
+\alias{zscore}
+\title{Statistical over- and under- representation of dinucleotides in a
+ sequence}
+\encoding{UTF-8}
+\description{
+ These two functions compute two different types of statistics for the
+ measure of statistical dinculeotide over- and under-representation :
+ the rho statistic, and the z-score, each computed for all 16 dinucleotides.
+}
+\usage{
+rho(sequence, wordsize = 2, alphabet = s2c("acgt"))
+zscore(sequence, simulations = NULL, modele, exact = FALSE, alphabet = s2c("acgt"), ... )
+}
+\arguments{
+ \item{sequence}{a vector of single characters.}
+ \item{wordsize}{an integer giving the size of word (n-mer) to consider.}
+ \item{simulations}{ If \code{NULL}, analytical solution is computed
+ when available (models \code{base} and \code{codon}). Otherwise, it
+ should be the number of permutations for the z-score computation }
+ \item{modele}{ A string of characters describing the model chosen for
+ the random generation }
+ \item{exact}{ Whether exact analytical calculation or an
+ approximation should be used }
+ \item{alphabet}{ A vector of single characters. }
+ \item{...}{ Optional parameters for specific model permutations are
+ passed on to \code{\link{permutation}} function. }
+}
+\details{
+ The \code{rho} statistic, as presented in Karlin S., Cardon LR. (1994), can
+ be computed on each of the 16 dinucleotides. It is the frequence of
+ dinucleotide \emph{xy} divided by the product of frequencies of
+ nucleotide \emph{x} and nucleotide \emph{y}. It is equal to 1.00 when
+ dinucleotide \emph{xy} is formed by pure chance, and it is superior
+ (respectively inferior) to 1.00 when dinucleotide \emph{xy} is over-
+ (respectively under-) represented. Note that if you want to reproduce
+ Karlin's results you have to compute the statistic from the sequence
+ concatenated with its inverted complement that is with something
+ like \code{rho(c(myseq, rev(comp(mysed))))}.
+
+ The \code{zscore} statistic, as presented in Palmeira, L., Guéguen, L.
+ and Lobry JR. (2006). The statistic is the normalization of the
+ \code{rho} statistic by its expectation and variance according to a
+ given random sequence generation model, and follows the
+ standard normal distribution. This statistic can be computed
+ with several models (cf. \code{\link{permutation}} for the description
+ of each of the models). We provide analytical calculus for two of
+ them: the \code{base} permutations model and the \code{codon}
+ permutations model.
+
+ The \code{base} model allows for random sequence generation by
+ shuffling (with/without replacement) of all bases in the sequence.
+ Analytical computations are available for this model: either as an
+ approximation for large sequences (cf. Palmeira, L., Guéguen, L.
+ and Lobry JR. (2006)), either as the exact analytical formulae
+ (cf. Schbath, S. (1995)).
+
+ The \code{position} model allows for random sequence generation
+ by shuffling (with/without replacement) of bases within their
+ position in the codon (bases in position I, II or III stay in
+ position I, II or III in the new sequence.
+
+ The \code{codon} model allows for random sequence generation by
+ shuffling (with/without replacement) of codons. Analytical
+ computation is available for this model (Gautier, C., Gouy, M. and
+ Louail, S. (1985)).
+
+ The \code{syncodon} model allows for random sequence generation
+ by shuffling (with/without replacement) of synonymous codons.
+}
+\value{
+ a table containing the computed statistic for each dinucleotide
+}
+\references{
+ Gautier, C., Gouy, M. and Louail, S. (1985) Non-parametric statistics
+ for nucleic acid sequence study. \emph{Biochimie}, \bold{67}:449-453.
+
+ Karlin S. and Cardon LR. (1994) Computational DNA sequence analysis.
+ \emph{Annu Rev Microbiol}, \bold{48}:619-654.
+
+ Schbath, S. (1995) Étude asymptotique du nombre d'occurrences d'un
+ mot dans une chaîne de Markov et application à la recherche de mots
+ de fréquence exceptionnelle dans les séquences d'ADN.
+ \emph{Thèse de l'Université René Descartes, Paris V}
+
+ Palmeira, L., Guéguen, L. and Lobry, J.R. (2006) UV-targeted dinucleotides
+ are not depleted in light-exposed Prokaryotic genomes.
+ \emph{Molecular Biology and Evolution},
+ \bold{23}:2214-2219.
+ \url{http://mbe.oxfordjournals.org/cgi/reprint/23/11/2214}
+
+ \code{citation("seqinr")}
+}
+\author{L. Palmeira, J.R. Lobry with suggestions from A. Coghlan.}
+\seealso{ \code{\link{permutation}} }
+\examples{
+\dontrun{
+sequence <- sample(x = s2c("acgt"), size = 6000, replace = TRUE)
+rho(sequence)
+zscore(sequence, modele = "base")
+zscore(sequence, modele = "base", exact = TRUE)
+zscore(sequence, modele = "codon")
+zscore(sequence, simulations = 1000, modele = "syncodon")
+}
+}
diff --git a/src/Makevars b/src/Makevars
new file mode 100644
index 0000000..8801198
--- /dev/null
+++ b/src/Makevars
@@ -0,0 +1,2 @@
+PKG_CFLAGS = -DUSE_TYPE_CHECKING_STRICT
+PKG_LIBS = -lz
diff --git a/src/Makevars.win b/src/Makevars.win
new file mode 100644
index 0000000..e5004ad
--- /dev/null
+++ b/src/Makevars.win
@@ -0,0 +1,2 @@
+PKG_CFLAGS = -DUSE_TYPE_CHECKING_STRICT
+# PKG_LIBS = -lz -lws2_32 -mwindows
diff --git a/src/alignment.c b/src/alignment.c
new file mode 100644
index 0000000..3ec6513
--- /dev/null
+++ b/src/alignment.c
@@ -0,0 +1,917 @@
+#include "alignment.h"
+
+
+void rem_blank(char *string)
+{
+ int ii;
+
+
+ ii = strlen(string);
+
+ for( ;ii >=0; ii--) {
+ if(string[ii] == 0 || string[ii] == '\n' ||
+ string[ii] == ' ' || string[ii] == '\t') string[ii] = 0;
+ else break;
+ }
+
+
+}
+/**************************** end rem_blank ************************/
+
+void free_mase(struct SEQMASE * aln, int nbsq)
+
+{
+ int ii;
+
+
+ for(ii = 0; ii <= nbsq; ii++) {
+ free(aln[ii].seq);
+ free(aln[ii].com);
+ }
+
+ free((char *) aln);
+
+
+}
+
+/******************************** end free_mase ************************/
+
+
+int one_more_seq_found(int count1, char ***pseq, char ***pseqname, char ***pcomments)
+{
+ static int max_count;
+ char **seq, **seqname, **comments;
+
+ if(count1 == -1) max_count = 0;
+
+ if(count1 + 1 < max_count) return count1 + 1;
+
+ count1++;
+ if(max_count == 0) {
+ max_count = 100;
+ seq = (char **)malloc(max_count * sizeof(char *));
+ if(seq == NULL) return -1;
+ seqname = (char **)malloc(max_count * sizeof(char *));
+ if(seqname == NULL) return -1;
+ comments = (char **)malloc(max_count * sizeof(char *));
+ if(comments == NULL) return -1;
+ }
+ else {
+ seq = *pseq; seqname = *pseqname; comments = *pcomments;
+ max_count = 3 * max_count;
+ seq = (char **)realloc(seq, max_count * sizeof(char *));
+ if(seq == NULL) return -1;
+ seqname = (char **)realloc(seqname, max_count * sizeof(char *));
+ if(seqname == NULL) return -1;
+ comments = (char **)realloc(comments, max_count * sizeof(char *));
+ if(comments == NULL) return -1;
+ }
+
+ *pseq = seq; *pseqname = seqname; *pcomments = comments;
+ return count1;
+}
+
+/******************************** end one_more_seq_found ************************/
+
+/***********************************************************************************************************************/
+/* lit un fichier MASE, renvoie une liste (objet R) contenant les s�quences, les commentaies et les noms des esp�ces. */
+/***********************************************************************************************************************/
+
+
+SEXP read_mase(SEXP nomfic)
+{
+ char *fic_name;
+ FILE *fic;
+ struct SEQMASE *aln;
+ int nb_seq;
+ int lg_max = 0, lg, lgs, lgc;
+ char string[MAXSTRING + 1];
+ char c1, c2;
+ int i,ii, jj, kk = 0, numline, maxcom = 0;
+
+ SEXP listseq;
+ SEXP essai;
+ SEXP listcom;
+ SEXP listmn;
+ SEXP nombreseq;
+
+
+ /*Passages des objets R (param�tres) dans des variables C */
+ fic_name = (char *) CHAR(STRING_ELT(nomfic, 0));
+
+
+
+ if((fic = fopen(fic_name, "r")) == NULL) {
+ error("Can't open file");
+ }
+
+ c1 = 0;
+ nb_seq = 0;
+ lg = lgc = 0;
+ while(fgets(string, MAXSTRING, fic) != NULL) {
+ string[MAXSTRING] = 0;
+
+ lgs = strlen(string);
+
+ if(lgs >= (MAXSTRING - 1)) {
+ REprintf("\n Fatal Error. Too long line in alignment (> %d).\n", MAXSTRING);
+ REprintf("Increase MAXSTRING and recompile.\n");
+ }
+
+ c2 = string[0];
+
+ if(string[0] == ';' && string[1] != ';') {
+ lgc += (lgs + 1);
+ }
+
+
+ if(c1 == ';' && c2 != c1) {
+ nb_seq++;
+ if(lg > lg_max) lg_max = lg;
+ if(lgc > maxcom) maxcom = lgc;
+ lg = lgc = 0;
+ }
+
+ else if(c2 != ';') lg += lgs;
+ c1 = c2;
+
+ }
+ if(lg > lg_max) lg_max = lg;
+
+
+ /******************************************/
+ /* Cr�ation de 6 objets R qui seront */
+ /******************************************/
+
+ PROTECT(listseq=allocVector(VECSXP,nb_seq));
+ PROTECT(essai=allocVector(VECSXP,5));
+ PROTECT(listcom=allocVector(VECSXP,nb_seq));
+ PROTECT(listmn=allocVector(VECSXP,nb_seq));
+ PROTECT(nombreseq=NEW_INTEGER(1));
+
+
+
+ aln = (struct SEQMASE *) calloc(nb_seq + 1, sizeof(struct SEQMASE));
+
+
+ for(ii = 0; ii <= nb_seq; ii++) {
+ aln[ii].seq = (char *) calloc(lg_max + 1, sizeof(char));
+ aln[ii].com = (char *) calloc(maxcom + 1, sizeof(char));
+ aln[ii].com[0] = 0;
+ }
+
+
+ rewind(fic);
+
+ numline = 0;
+ ii = -1;
+ while(fgets(string, MAXSTRING, fic) != NULL) {
+ numline++;
+ string[MAXSTRING] = 0;
+ if ((string[0] != ';') && (numline == 1))
+ {
+ error("Not a MASE file"); /* check format, thanks to J.H. Troesemeier */
+ goto fini;
+ }
+
+ c2 = string[0];
+
+ if(string[0] == ';' && string[1] != ';') {
+ strcat(aln[ii + 1].com, string);
+ }
+
+ if(c1 == ';' && c2 != c1) {
+ ii++;
+ kk = aln[ii].lg = 0;
+
+ rem_blank(string);
+
+ if((int) strlen(string) >= (MAXMNMASE - 1)) {
+ REprintf("Error. Maximum sequance name is %d characters\n", MAXMNMASE);
+ error("sequence name too long!");
+ }
+
+ strcpy(aln[ii].mn, string);
+
+ lg = 0;
+ }
+
+ else if(c2 != ';') {
+ for(jj = 0; jj < MAXSTRING; jj++) {
+ if(string[jj] == 0) break;
+ if(string[jj] == ' ') continue;
+ if(string[jj] == '\n') continue;
+ if(string[jj] == '\t') continue;
+ aln[ii].seq[kk++] = string[jj];
+ aln[ii].lg = kk;
+ }
+ }
+ c1 = c2;
+
+ }
+
+
+ fclose(fic);
+
+ lg_max = aln[0].lg;
+
+ for(ii = 1; ii < nb_seq; ii++)
+ if(aln[ii].lg > lg_max) lg_max = aln[ii].lg;
+
+ INTEGER(nombreseq)[0]=(int)nb_seq;
+
+
+ for(i=0;i<nb_seq;i++){
+ SET_ELEMENT(listseq,i,mkChar(aln[i].seq));
+ }
+
+ for(i=0;i<nb_seq;i++){
+ SET_ELEMENT(listcom,i,mkChar(aln[i].com));
+ }
+
+for(i=0;i<nb_seq;i++){
+ SET_ELEMENT(listmn,i,mkChar(aln[i].mn));
+ }
+
+ SET_ELEMENT(essai,0,nombreseq);
+ SET_ELEMENT(essai,1,listmn);
+ SET_ELEMENT(essai,2,listseq);
+ SET_ELEMENT(essai,3,listcom);
+
+ fini:
+ free_mase(aln,nb_seq);
+ UNPROTECT(5);
+
+ return(essai);
+
+}
+
+
+/********************** end read_mase ****************************/
+
+
+
+
+/*************************************************************************/
+/* Compute distance between two aligned sequences using different matrix */
+/*************************************************************************/
+
+
+SEXP distance(SEXP sequences,SEXP nbseq, SEXP matNumber, SEXP seqtype, SEXP gapoption){
+
+ SEXP d;
+ int MAXNSEQS;
+ char **seq;
+ int gap_option;
+ int i, j, k, n,totseqs, seq_long, nbases;
+ int mat_number, seq_type;
+ int **ndiff;
+ double **dist;
+
+ int mat_pos[] = { 17, -1, 15, 0, 1, 12, 18, 4, 9, -1, 2, 10, 16, 5, -1, 19, 6, 3, 7, 8, -1, 11, 13, -1, 14, -1 };
+
+ const char DNA[] = "ACGTXN-";
+ const char Prot[] = "DEKRHNQSTILVFWYCMAGPX*-";
+ int matp[20][20] = { {1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1/3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 1/3, 1/3, 1/3, 2/3, 2/3, 1, 1, 1 },
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 2/3, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 2/3, 1/3, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3, 1/3, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/3} };
+
+
+
+
+ MAXNSEQS = INTEGER_VALUE(nbseq);
+ totseqs = INTEGER_VALUE(nbseq);
+ mat_number= INTEGER_VALUE(matNumber);
+ seq_type = INTEGER_VALUE(seqtype);
+ gap_option = INTEGER_VALUE(gapoption);
+
+ PROTECT(d=NEW_NUMERIC(totseqs*totseqs));
+
+ seq = (char **)malloc(totseqs*sizeof(char *));
+
+ for(i=0;i<totseqs;i++){
+ seq[i] = (char *) CHAR(STRING_ELT(sequences,i));
+ }
+
+ ndiff = (int **)malloc(MAXNSEQS*sizeof(int *));
+
+ for(j=0; j < MAXNSEQS; j++){
+ ndiff[j] = (int *)malloc(MAXNSEQS*sizeof(int));
+ }
+
+ dist = (double **)malloc(MAXNSEQS*sizeof(double *));
+
+ for(j=0; j < MAXNSEQS; j++){
+ dist[j] = (double *)malloc(MAXNSEQS*sizeof(double));
+ }
+
+
+
+ /*********************************************************/
+ /* Computing distance between sequences i and j */
+ /*********************************************************/
+
+ seq_long = (int)strlen(seq[0]);
+
+ for (i = 0; i < totseqs; i++)
+ {
+ dist[i][i] = 0.0;
+ }
+
+ for (i = 0; i < totseqs; i++)
+ {
+ for (j = 0; j < i; j++)
+ {
+ ndiff[j][i] = ndiff[i][j] = 0;
+ nbases = 0;
+ for (k = 0; k < seq_long; k++)
+ {
+
+ if(seq_type == 1){
+
+ /***************************/
+ /* DNA/RNA sequences */
+ /***************************/
+ if ((strchr(DNA, seq[i][k]) == NULL) || (strchr(DNA, seq[j][k]) == NULL))
+ seq[i][k] = seq[j][k] = 'X';
+ if (seq[i][k] != '-' && seq[i][k] != 'N' && seq[i][k] != 'X' &&
+ seq[j][k] != '-' && seq[j][k] != 'N' && seq[j][k] != 'X')
+ {
+ nbases++;
+ if (seq[i][k] != seq[j][k])
+ {
+ ndiff[j][i]++;
+ ndiff[i][j]++;
+ }
+ }
+
+ if (gap_option == 1)
+ {
+ if (seq[i][k] == '-' && seq[j][k] != '-' && seq[j][k] != 'N' && seq[j][k] != 'X')
+ {
+ nbases++;
+ ndiff[j][i]++;
+ ndiff[i][j]++;
+ }
+ if (seq[j][k] == '-' && seq[i][k] != '-' && seq[i][k] != 'N' && seq[i][k] != 'X')
+ {
+ nbases++;
+ ndiff[j][i]++;
+ ndiff[i][j]++;
+ }
+
+ }
+ }
+
+ else if(mat_number == 1 && seq_type == 0){
+
+ /********************************************/
+ /*Protein sequences with similarity matrix */
+ /********************************************/
+
+ if ((strchr(Prot, seq[i][k]) == NULL) || (strchr(Prot, seq[j][k]) == NULL))
+ seq[i][k] = seq[j][k] = 'X';
+ if (seq[i][k] != '-' && seq[i][k] != '*' && seq[i][k] != 'X' &&
+ seq[j][k] != '-' && seq[j][k] != '*' && seq[j][k] != 'X')
+ {
+ nbases++;
+ ndiff[i][j] = ndiff[i][j] + matp[mat_pos[seq[i][k] - 65]][mat_pos[seq[j][k] - 65]];
+ ndiff[j][i] = ndiff[i][j];
+
+ }
+ }
+
+ else if( mat_number == 2 && seq_type == 0){
+
+ /********************************************/
+ /* Protein sequences with identity matrix */
+ /********************************************/
+
+ if ((strchr(Prot, seq[i][k]) == NULL) || (strchr(Prot, seq[j][k]) == NULL))
+ seq[i][k] = seq[j][k] = 'X';
+ if (seq[i][k] != '-' && seq[i][k] != '*' && seq[i][k] != 'X' &&
+ seq[j][k] != '-' && seq[j][k] != '*' && seq[j][k] != 'X')
+ {
+ nbases++;
+ if (seq[i][k] != seq[j][k])
+ {
+ ndiff[j][i]++;
+ ndiff[i][j]++;
+ }
+ }
+
+ }
+
+ dist[i][j] = dist[j][i] = sqrt((double)ndiff[i][j]/nbases);
+ }
+ }
+ }
+
+
+ /********************************************************************************/
+ /* Remplissage de l'objet R (matrice de taille nb_seq * nb_seq avec dist */
+ /********************************************************************************/
+
+ n=0;
+
+ for(i=0;i<totseqs;i++){
+ for(j=0;j<totseqs;j++){
+ REAL(d)[n+j]=dist[i][j];
+ }
+ n=n+totseqs;
+ }
+
+ UNPROTECT(1);
+
+ return(d);
+}
+
+/****************************************/
+/* Lecture d'un fichier au format msf */
+/***************************************/
+
+SEXP read_msf_align(SEXP ficname)
+{
+
+ SEXP list;
+ SEXP listseq;
+ SEXP listname;
+ SEXP nombreseq;
+ char *fname;
+ FILE *in;
+ char line[100], *p, *q;
+ int i,l, curr_spec, maxwidname=0, curr_len, tot_spec, wid_1_line, wid_block;
+ char **seq, **seqname, **comments;
+
+ fname = (char *) CHAR(STRING_ELT(ficname,0));
+
+ PROTECT(nombreseq=NEW_INTEGER(1));
+ PROTECT(list=allocVector(VECSXP,3));
+
+ in=fopen(fname,"r");
+ if(in==NULL) {
+ error("File not found");
+ }
+
+ /* compter le nbre de seqs dans le fichier */
+ tot_spec = 0;
+ while(fgets(line, sizeof(line), in) != NULL) {
+ if(strncmp(line, "//", 2) == 0) break;
+ if(strncmp(line, " Name: ", 7) == 0) tot_spec++;
+ }
+ rewind(in);
+
+ INTEGER(nombreseq)[0]=tot_spec;
+
+ PROTECT(listname=allocVector(VECSXP,tot_spec));
+ PROTECT(listseq=allocVector(VECSXP,tot_spec));
+
+ seq = (char **)malloc(tot_spec * sizeof(char *));
+ if(seq == NULL) goto nomem;
+ comments = (char **)malloc(tot_spec * sizeof(char *));
+ if(comments == NULL) goto nomem;
+ seqname = (char **)malloc(tot_spec * sizeof(char *));
+ if(seqname == NULL) goto nomem;
+
+ p = NULL;
+ while( fgets(line,sizeof(line),in) != NULL) {
+ if( (p = strstr(line, "MSF: ")) != NULL) break;
+ }
+ if(p == NULL) {
+ error("File not in MSF format!");
+ tot_spec = -1; goto fini;
+ }
+ tot_spec = -1;
+ do {
+ if(!fgets(line,sizeof(line),in))
+ break;
+
+ if( (p = strstr(line, "Name:") ) == NULL) continue;
+ tot_spec++;
+ q = strstr(p, " Len: ");
+ sscanf(q + 5, "%d", &l);
+ seq[tot_spec] = (char *)malloc(l + 1);
+ if(seq[tot_spec]==NULL) goto nomem;
+ p += 5; while(*p == ' ') p++;
+ q = p; while(*q != ' ') q++;
+ l = q - p;
+ seqname[tot_spec] = (char *)malloc(l + 1);
+ if(seqname[tot_spec]==NULL) goto nomem;
+ memcpy(seqname[tot_spec], p, l); seqname[tot_spec][l] = 0;
+ if(l > maxwidname) maxwidname = l;
+ comments[tot_spec] = NULL;
+ }
+ while(strncmp(line, "//", 2) != 0);
+ curr_spec = 0; curr_len = 0; wid_block = 0;
+ while( fgets(line, sizeof(line), in) != NULL ) {
+ p = line; while(*p == ' ') p++;
+ l = strlen(seqname[curr_spec]);
+ if(strncmp(p, seqname[curr_spec], l) != 0) continue;
+ p += l; while(*p == ' ') p++; p--;
+ q = seq[curr_spec] + curr_len;
+ while( *(++p) != '\n') {
+ if( *p == ' ') continue;
+ if(*p == '.') *p = '-';
+ *(q++) = *p;
+ }
+ *q = 0;
+ wid_1_line = q - (seq[curr_spec] + curr_len);
+ wid_block = (wid_1_line > wid_block ? wid_1_line : wid_block);
+ if(curr_spec == tot_spec) {
+ curr_len += wid_block;
+ curr_spec = 0;
+ wid_block = 0;
+ }
+ else curr_spec++;
+ }
+
+ for(i=0; i<tot_spec+1; i++) {
+ SET_ELEMENT(listname,i,mkChar(seqname[i]));
+ SET_ELEMENT(listseq,i,mkChar(seq[i]));
+ }
+
+ SET_ELEMENT(list,0,nombreseq);
+ SET_ELEMENT(list,1,listname);
+ SET_ELEMENT(list,2,listseq);
+
+
+ fini:
+ fclose(in);
+ UNPROTECT(4);
+ return list;
+ nomem:
+ error("Not enough memory!");
+ tot_spec = -1;
+ goto fini;
+}
+
+
+/******************************************/
+/* Lecture d'un fichier au format phylip */
+/******************************************/
+
+
+SEXP read_phylip_align(SEXP ficname)
+{
+
+ SEXP list;
+ SEXP listseq;
+ SEXP listname;
+ SEXP nombreseq;
+ char *fname;
+ FILE *in;
+ char *p, *q, line[PHYNAME + 200];
+ char **seq, **comments, **seqname;
+ int totseqs, lenseqs, i, l;
+
+ q=0;
+ fname = (char *) CHAR(STRING_ELT(ficname,0));
+
+ PROTECT(nombreseq=NEW_INTEGER(1));
+ PROTECT(list=allocVector(VECSXP,3));
+
+
+ in=fopen(fname,"r");
+ if(in==NULL) {
+ error("file not found");
+ }
+
+ if(!fgets(line,sizeof(line),in) ||
+ sscanf(line, "%d%d", &totseqs, &lenseqs) != 2) {
+ error("Not a PHYLIP file");
+ totseqs = 0;
+ goto fini;
+ }
+
+ INTEGER(nombreseq)[0]=totseqs;
+
+ PROTECT(listname=allocVector(VECSXP,totseqs));
+ PROTECT(listseq=allocVector(VECSXP,totseqs));
+
+ seq = (char **)malloc(totseqs * sizeof(char *));
+ if(seq == NULL) goto nomem;
+ seqname = (char **)malloc(totseqs * sizeof(char *));
+ if(seqname == NULL) goto nomem;
+ comments = (char **)malloc(totseqs * sizeof(char *));
+ if(comments == NULL) goto nomem;
+ for(i=0; i<totseqs; i++) {
+ if( (seq[i] = (char *)malloc(lenseqs+1) ) == NULL ) goto nomem;
+ if( (seqname[i] = (char *)malloc(PHYNAME+1) ) == NULL ) goto nomem;
+ comments[i] = NULL;
+ }
+ for(i=0; i<totseqs; i++) {
+ if(!fgets(line,sizeof(line),in)) goto eof;
+ memcpy(seqname[i],line,PHYNAME); seqname[i][PHYNAME] = 0;
+ p = line+PHYNAME; q = seq[i];
+ while(*p != '\n') {
+ if(*p != ' ') *(q++) = *p;
+ p++;
+ }
+ }
+ l = q - seq[totseqs - 1];
+ while( l < lenseqs) {
+ if(!fgets(line,sizeof(line),in)) goto eof;
+ for(i=0; i<totseqs; i++) {
+ if(!fgets(line,sizeof(line),in)) goto eof;
+ p = line; q = seq[i] + l;
+ while(*p != '\n') {
+ if(*p != ' ') *(q++) = *p;
+ p++;
+ }
+ }
+ l = q - seq[totseqs - 1];
+ }
+ for(i=0; i<totseqs; i++) seq[i][l] = 0;
+
+
+
+ for(i=0; i<totseqs; i++) {
+ SET_ELEMENT(listname,i,mkChar(seqname[i]));
+ SET_ELEMENT(listseq,i,mkChar(seq[i]));
+ }
+
+ SET_ELEMENT(list,0,nombreseq);
+ SET_ELEMENT(list,1,listname);
+ SET_ELEMENT(list,2,listseq);
+
+
+ fini:
+ fclose(in);
+ UNPROTECT(4);
+ return list;
+ nomem:
+ error("Not enough memory!");
+ totseqs = 0;
+ goto fini;
+ eof:
+ error("Unexpected EOF/Corrupt File");
+ totseqs = 0;
+ goto fini;
+}
+
+
+
+/*************************************/
+/* Reading alignment in fasta format */
+/*************************************/
+
+SEXP read_fasta_align(SEXP ficname)
+{
+ SEXP list;
+ SEXP listseq;
+ SEXP listname;
+ SEXP nombreseq;
+ char *fname;
+ FILE *in;
+ int totseqs, lseq, l2, l, lenseqs;
+ char line[200], *p, *i;
+ char **seq, **seqname, **comments;
+
+ fname = (char *) CHAR(STRING_ELT(ficname, 0));
+
+ PROTECT(nombreseq = NEW_INTEGER(1));
+ PROTECT(list = allocVector(VECSXP, 3));
+
+ /* Check that the file is available */
+
+ if((in = fopen(fname, "r")) == NULL) error("File not found");
+
+ /* How many sequences are in the file ? */
+
+ totseqs = 0;
+ while(fgets(line, sizeof(line), in) != NULL)
+ if(*line == '>') totseqs++;
+ rewind(in);
+
+ /* R objects creation */
+
+ INTEGER(nombreseq)[0] = totseqs;
+ PROTECT(listname = allocVector(VECSXP, totseqs));
+ PROTECT(listseq = allocVector(VECSXP, totseqs));
+
+ /* Memory allocation */
+
+ seq = (char **) R_alloc(totseqs, sizeof(char *));
+ comments = (char **) R_alloc(totseqs, sizeof(char *));
+ seqname = (char **) R_alloc(totseqs, sizeof(char *));
+
+ lenseqs = MAXLENSEQ;
+ totseqs = -1;
+ i = fgets(line, sizeof(line), in);
+ if(line[0] != '>')
+ error("File not in Fasta format!\n");
+
+ /* Main loop to read line by line the file */
+
+ while( i != NULL ){
+ totseqs++;
+ comments[totseqs] = NULL;
+ p = line + 1;
+ while(*p != '\n')
+ p++;
+ l = p - line - 1;
+
+ seqname[totseqs] = (char *) R_alloc(l + 1, sizeof(char));
+
+ memcpy(seqname[totseqs], line + 1, l);
+ seqname[totseqs][l] = '\0';
+ SET_ELEMENT(listname, totseqs, mkChar(seqname[totseqs]));
+
+ seq[totseqs] = (char *) R_alloc(lenseqs + 1, sizeof(char));
+ lseq = 0;
+
+ while( (i = fgets(line, sizeof(line), in)) != NULL && *i != '>' ) {
+ l2 = strlen(line);
+ if( line[l2 - 1] == '\n' ) l2--;
+ while(l2 > 0 && line[l2 - 1] == ' ')
+ l2--;
+ if(lseq + l2 > lenseqs) {
+ char *temp;
+ lenseqs += MAXLENSEQ;
+ temp = R_alloc(lenseqs + 1, sizeof(char));
+ memcpy(temp, seq[totseqs], lseq);
+ seq[totseqs] = temp;
+ }
+ memcpy(seq[totseqs] + lseq, line, l2);
+ lseq += l2;
+ }
+ seq[totseqs][lseq] = '\0';
+ SET_ELEMENT(listseq, totseqs, mkChar(seq[totseqs]));
+ }
+
+ SET_ELEMENT(list, 0, nombreseq);
+ SET_ELEMENT(list, 1, listname);
+ SET_ELEMENT(list, 2, listseq);
+
+ fclose(in);
+ UNPROTECT(4);
+ return list;
+}
+
+
+
+/*******************************************/
+/* Lecture d'un fichier au format clustal */
+/*******************************************/
+
+
+
+SEXP read_clustal_align(SEXP ficname)
+{
+
+ SEXP list;
+ SEXP listseq;
+ SEXP listname;
+ SEXP nombreseq;
+ char *fname;
+ FILE *in;
+ char line[200], *p;
+ int i, l = 0, curr_spec, first=TRUEL, curr_len, next_len, tot_spec, curr_max_len =0, carac, wid_name = 0;
+ char **seq, **comments, **seqname = NULL;
+
+
+ fname = (char *) CHAR(STRING_ELT(ficname,0));
+
+ PROTECT(nombreseq=NEW_INTEGER(1));
+ PROTECT(list=allocVector(VECSXP,3));
+
+ in=fopen(fname,"r");
+
+ if(in==NULL) {
+ error("file not found");
+ return 0;
+ }
+
+
+ if(!fgets(line,sizeof(line),in) || strncmp(line,"CLUSTAL",7) != 0) { /* skip 1st line with CLUSTAL in it */
+ error("File not in CLUSTAL format!");
+ tot_spec = -1; goto fini;
+ }
+
+ /* skip next empty lines */
+ do {
+ carac = getc(in);
+ if(carac == ' ') {
+ if(!fgets(line,sizeof(line),in)) {
+ error("Unexpected EOF");
+ tot_spec = 0;
+ goto fini;
+ }
+ carac = getc(in);
+ }
+ }
+ while(carac == '\n' || carac == '\r');
+ ungetc(carac, in); /* back to start of 1st non-empty line */
+ tot_spec = curr_spec = -1; curr_len = next_len = 0;
+ while( fgets(line, sizeof(line), in) != NULL ) {
+ if(*line == '\n' || *line == ' ') {
+ curr_spec = -1;
+ curr_len = next_len;
+ first = FALSE;
+ continue;
+ }
+
+ else if(tot_spec >= 0 && curr_spec == -1 &&
+ strncmp(line, seqname[0], strlen(seqname[0]) ) != 0) {
+ break;
+ }
+ else {
+ if(first) {
+ curr_spec = one_more_seq_found(curr_spec, &seq, &seqname, &comments);
+ if(curr_spec == -1) goto nomem;
+ }
+ else curr_spec++;
+ }
+
+
+ if(first && curr_spec == 0) {
+ /* calcul long partie nom: enlever tout ce qui n'est pas espace en fin */
+ p = line + strlen(line) - 2;
+ while(*p == ' ' || isdigit(*p) ) p--;
+ while (*p != ' ') p--;
+ wid_name = p - line + 1;
+ }
+
+
+ if(first) {
+ seqname[curr_spec] = (char *)malloc(wid_name+1);
+ if(seqname[curr_spec]==NULL) {
+ goto nomem;
+ }
+ memcpy(seqname[curr_spec], line, wid_name);
+ p = seqname[curr_spec] + wid_name - 1;
+ while(*p==' ') p--; *(p+1)=0;
+ if(curr_spec > tot_spec) tot_spec = curr_spec;
+ seq[curr_spec] = (char *)malloc(CLU_BLOCK_LEN+1);
+ curr_max_len = CLU_BLOCK_LEN;
+ if(seq[curr_spec]==NULL) {
+ goto nomem;
+ }
+ comments[curr_spec] = NULL;
+ }
+ if(curr_spec == 0) {
+ l = strlen(line) - 1;
+ p = line + l - 1;
+ while(*p == ' ' || isdigit(*p) ) { p--; l--; }
+ l -= wid_name;
+ if(curr_len + l > curr_max_len) {
+ curr_max_len += CLU_BLOCK_LEN;
+ for(i=0; i<=tot_spec; i++) {
+ p = (char *)malloc(curr_max_len+1);
+ if(p == NULL) goto nomem;
+ memcpy(p, seq[i], curr_len);
+ free(seq[i]);
+ seq[i] = p;
+ }
+ }
+ next_len = curr_len + l;
+ }
+ memcpy(seq[curr_spec]+curr_len, line + wid_name, l);
+ }
+ for(i=0; i<=tot_spec; i++) seq[i][next_len] = 0;
+ seq = (char **)realloc(seq, (tot_spec + 1)*sizeof(char *));
+ seqname = (char **)realloc(seqname, (tot_spec + 1)*sizeof(char *));
+ comments = (char **)realloc(comments, (tot_spec + 1)*sizeof(char *));
+
+
+ INTEGER(nombreseq)[0]=tot_spec+1;
+
+ PROTECT(listname=allocVector(VECSXP,tot_spec+1));
+ PROTECT(listseq=allocVector(VECSXP,tot_spec+1));
+
+ for(i=0; i<tot_spec+1; i++) {
+ SET_ELEMENT(listname,i,mkChar(seqname[i]));
+ SET_ELEMENT(listseq,i,mkChar(seq[i]));
+ }
+
+ SET_ELEMENT(list,0,nombreseq);
+ SET_ELEMENT(list,1,listname);
+ SET_ELEMENT(list,2,listseq);
+
+
+ fini:
+ fclose(in);
+ UNPROTECT(4);
+ return list;
+ nomem:
+ error("Not enough memory!");
+ tot_spec = -1;
+ goto fini;
+}
diff --git a/src/alignment.h b/src/alignment.h
new file mode 100644
index 0000000..df4ec11
--- /dev/null
+++ b/src/alignment.h
@@ -0,0 +1,33 @@
+#include <Rinternals.h>
+#include <R.h>
+#include <Rdefines.h>
+#include <ctype.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+
+#define FALSE 0
+#define TRUEL (!FALSE)
+#define MAXLENCOM 50000 /* long max des commentaires sous mase */
+#define MAX_SPECIES_SETS 50 /* nbre max de species sets */
+#define PHYNAME 10
+#define CLU_WID_NAME 16
+#define MSF_WID_NAME 15
+#define CLU_BLOCK_LEN 5000 /* block pour allocation mem format Clustal */
+#define MAX_GAP_SITES 1000
+#define MAXLENSEQ 10000
+#define MAXMNMASE 30
+#define MAXSTRING 10000
+
+struct SEQMASE
+{
+ char mn[MAXMNMASE];
+ char *com;
+ char *seq;
+ int lg;
+};
diff --git a/src/fastacc.c b/src/fastacc.c
new file mode 100644
index 0000000..0b0f778
--- /dev/null
+++ b/src/fastacc.c
@@ -0,0 +1,39 @@
+#include <R.h>
+#include <Rdefines.h>
+
+
+SEXP fastacc(SEXP bits_in_char, SEXP target, SEXP database, SEXP noc, SEXP n){
+ int i,j;
+ SEXP res;
+ int *pbits_in_char, *pnoc, *pn, *pres;
+ unsigned char *ptarget, *pdatabase;
+ int ires;
+
+ PROTECT(bits_in_char = AS_INTEGER(bits_in_char));
+ pbits_in_char = INTEGER_POINTER(bits_in_char);
+
+ PROTECT(target = AS_RAW(target));
+ ptarget = RAW_POINTER(target);
+
+ PROTECT(database = AS_RAW(database));
+ pdatabase = RAW_POINTER(database);
+
+ PROTECT(noc = AS_INTEGER(noc));
+ pnoc = INTEGER_POINTER(noc);
+
+ PROTECT(n = AS_INTEGER(n));
+ pn = INTEGER_POINTER(n);
+
+ PROTECT(res = NEW_INTEGER(*pn));
+ pres = INTEGER_POINTER(res);
+
+ for(ires = i = 0 ; i < *pn * *pnoc; i += *pnoc, ires++){
+ pres[ires] = 0;
+ for(j = 0; j < *pnoc ; j++){
+ pres[ires] += pbits_in_char[pdatabase[i+j] & ptarget[j]];
+ }
+ }
+
+ UNPROTECT(6);
+ return(res);
+}
diff --git a/src/getzlibsock.c b/src/getzlibsock.c
new file mode 100644
index 0000000..2acc09b
--- /dev/null
+++ b/src/getzlibsock.c
@@ -0,0 +1,244 @@
+#include <R.h>
+#include <Rdefines.h>
+#include <Rinternals.h>
+#ifndef WIN32
+#ifdef _WIN32
+#define WIN32 1
+#endif
+#endif
+#ifndef WIN32
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <string.h>
+
+void *prepare_sock_gz_r(int ncon );
+char *z_read_sock(void *v);
+int close_sock_gz_r(void *v);
+static void *extract_opaque = NULL;
+
+#define R_EOF -1
+#define MAXESSAIS 2 /*#define MAXESSAIS 1000000*/
+
+SEXP getzlibsock(SEXP sock, SEXP nmax, SEXP debug)
+{
+
+/* Variable de types SEXP :entree et sorties*/
+
+ SEXP ans = R_NilValue;
+ SEXP ans2 = R_NilValue;
+
+/* Quelles variable faut il proteger : s'appliqe uniquement aux objets R, cad SEXP : ans et ans2*/
+
+ int nprotect = 0;
+ int debugon;
+ int testc;
+ int numsoc;
+
+
+ int i,j, n, nn, nnn, ok, warn, nread, c;
+ int itest,itestd;
+
+ char *test;
+ char *res;
+
+ int flagend =0;
+ int nbseq =0;
+
+
+ debugon = INTEGER_VALUE(debug);
+ n=INTEGER_VALUE(nmax);
+ if (debugon)
+ Rprintf("Running getzlibsock... \n");
+
+ if(!inherits(sock, "connection")) {
+ Rprintf("Error!\n\n'con' is not a connection");
+ ans2 = PROTECT(allocVector(STRSXP, 1));
+ nprotect ++;
+ SET_STRING_ELT(ans2, 0,mkChar("Socket is not defined."));
+ PROTECT(ans = ans2);
+ nprotect ++;
+ UNPROTECT(nprotect);
+ nprotect=0;
+ return ans ;
+ }
+ if (debugon)
+ Rprintf("'con' is a connection...\n");
+ numsoc = asInteger(sock);
+/* Pour UNIX ( pbil):
+ numsoc = asInteger(sock) + 1;
+ con=getConnection(numsoc);
+ scon= (Rsockconn)con->private;
+ numsoc = scon->fd;*/
+
+
+ numsoc ++;
+
+ if (debugon)
+ Rprintf("Socket number is %d....\n",numsoc);
+ extract_opaque=prepare_sock_gz_r(numsoc);
+ if (extract_opaque == NULL) {
+ Rprintf("Erreur dans prepare_sock_gz_r\n");
+ ans2 = PROTECT(allocVector(STRSXP, 1));
+ nprotect ++;
+ SET_STRING_ELT(ans2, 0,mkChar("Socket is not defined."));
+ PROTECT(ans = ans2);
+ nprotect ++;
+ UNPROTECT(nprotect);
+ nprotect=0;
+ return ans ;
+ }
+
+ if (debugon)
+ Rprintf("Trying to get answer from socket...\n");
+
+ res=z_read_sock(extract_opaque);
+
+ /*AJOUT PATCHE CRADO ( devrait etre inutile)*/
+ itest=0;
+ itestd=0;
+ while ( res == NULL){
+ res=z_read_sock(extract_opaque);
+ itest++;
+ itestd++;
+ if (debugon){
+ if (itestd>10) {
+ Rprintf("*");
+ itestd=0;
+ }
+ }
+
+ if (itest> MAXESSAIS) {
+ Rprintf("Socket error!\n");
+ Rprintf("No answer from socket after %d trials!\n",itest);
+ ans2 = PROTECT(allocVector(STRSXP, 1));
+ nprotect++;
+ SET_STRING_ELT(ans2, 0,mkChar("No answer from socket."));
+ PROTECT(ans = ans2);
+ nprotect++;
+ UNPROTECT(nprotect);
+ nprotect=0;
+ testc=close_sock_gz_r(extract_opaque);
+ if (debugon)
+ Rprintf("Closing socket close_sock_gz_r status = %d\n",testc);
+ return ans ;
+ }
+ }
+
+ if (debugon)
+ Rprintf("\n-->[%s]\n",res);
+ if (strncmp(res,"code=0",6) != 0) {
+ Rprintf("extractseqs error!\n");
+ Rprintf("[%s]\n",res);
+ ans2 = PROTECT(allocVector(STRSXP, 1));
+ nprotect++;
+ SET_STRING_ELT(ans2, 0,mkChar("Wrong answer from socket."));
+ PROTECT(ans = ans2);
+ nprotect++;
+ UNPROTECT(nprotect);
+ nprotect=0;
+ testc=close_sock_gz_r(extract_opaque);
+ if (debugon)
+ Rprintf("Closing socket close_sock_gz_r status = %d\n",testc);
+ return ans ;
+ }
+ if (debugon)
+ Rprintf("Socket answer is ok %s(%d)\n",res, strlen(res));
+ nn = (n < 0) ? 1000 : n; /* initially allocate space for 1000 lines */
+ nnn = (n < 0) ? INT_MAX : n;
+ PROTECT(ans = allocVector(STRSXP, nn));
+ nprotect++;
+ nread=0;
+ if (debugon)
+ Rprintf("n=%d, nn=%d,nnn=%d\n",n,nn, nnn);
+ res=z_read_sock(extract_opaque);
+ while ((res != NULL) ) {
+
+ if (nread >=nnn) {
+ if (debugon)
+ Rprintf("Increasing memory...\n");
+ PROTECT(ans2 = (allocVector(STRSXP, 2*nn)));
+ nprotect++;
+ for(i = 0; i < nn; i++)
+ SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
+ nn *= 2;
+ nnn=nn;
+ UNPROTECT(nprotect); /* old ans et ans2 */
+ PROTECT(ans = ans2);
+ nprotect=1;
+ };
+ if (strncmp(res,"extractseqs END.",16) == 0){
+ if (debugon)
+ Rprintf("extractseqs successfully ended ...\n");
+ flagend=1;
+ break;
+ }
+ if ((strncmp(res,"code=0",6) == 0) && (nread >0)) {
+ Rprintf("-->[%s]\n",res);
+ Rprintf("WARNING!\nextractseqs unsuccessfully ended ...\n");
+ flagend=1;
+ break;
+ }
+ if (strncmp(res,"\033count=", 7) == 0){
+ nbseq++;
+ } else {
+ SET_STRING_ELT(ans, nread, mkChar(res));
+ nread++;
+ }
+
+ res=z_read_sock(extract_opaque);
+ }
+ if (debugon)
+ Rprintf("Number of lines : %d\n",nread-1);
+ if (debugon)
+ Rprintf("Number of sequences : %d\n",nbseq);
+ if (flagend) {
+ if (debugon)
+ Rprintf("extractseqs OK, program carry on...\n");
+ if (debugon)
+ Rprintf("Ok, everything is fine!\n");
+ }
+ else{
+ Rprintf("extractseqs error!\n");
+ ans2 = PROTECT(allocVector(STRSXP, 1));
+ nprotect++;
+ SET_STRING_ELT(ans2, 0,mkChar("Wrong answer from socket."));
+ PROTECT(ans = ans2);
+ nprotect++;
+ }
+
+ testc=close_sock_gz_r(extract_opaque);
+ if (debugon)
+ Rprintf("Closing socket close_sock_gz_r status = %d\n",testc);
+ UNPROTECT(nprotect);
+ return ans ;
+}
+#else
+SEXP getzlibsock(SEXP sock, SEXP nmax, SEXP debug)
+{
+
+/* Variable de types SEXP :entree et sorties*/
+
+ SEXP ans = R_NilValue;
+ SEXP ans2 = R_NilValue;
+
+/* Quelles variable faut il proteger : s'appliqe uniquement aux objets R, cad SEXP : ans et ans2*/
+
+ int nprotect = 0;
+ int debugon;
+
+
+ debugon = INTEGER_VALUE(debug);
+ if (debugon)
+ Rprintf("Running getzlibsock... \n");
+ Rprintf("Warning!\n\nCompressed sockets are not yet available on Windows.\n");
+ ans2 = PROTECT(allocVector(STRSXP, 1));
+ nprotect ++;
+ SET_STRING_ELT(ans2, 0,mkChar("Compressed sockets are not yet available on Windows."));
+ PROTECT(ans = ans2);
+ nprotect ++;
+ UNPROTECT(nprotect);
+ nprotect=0;
+ return ans ;
+ }
+#endif
diff --git a/src/kaks.c b/src/kaks.c
new file mode 100644
index 0000000..61c1cdc
--- /dev/null
+++ b/src/kaks.c
@@ -0,0 +1,1125 @@
+#include <R.h>
+#include <Rdefines.h>
+
+int code_mt = 0; /* Not implemented yet */
+
+void reresh(char **, int, int);
+void prefastlwl(double **, double **, double **, double **, double **, double **, double **, double **, double **, double **);
+int fastlwl(char **, int, int, double **, double **, double **, double **, double **, double **, double **, double **,
+ double **, double **, double **, double **, double **,double **, double **,double **,double **, double **,double **,double **, double **,double **);
+
+SEXP kaks(SEXP sequences, SEXP nbseq, SEXP debugkaks)
+{
+ char **seqIn; /* local working copy of sequences */
+ char **seq; /* pointer to original sequences from R object */
+ double *tl0[64], *tl1[64], *tl2[64], *tti0[64], *tti1[64], *tti2[64], *ttv0[64], *ttv1[64], *ttv2[64];
+ int i, j, totseqs, lgseq, n;
+ int debugon;
+ double *rl[21];
+ double **ka, **ks, **vka, **vks;
+ double **l0, **l2,**l4;
+ double **a0, **a2,**a4;
+ double **b0, **b2,**b4;
+
+ double *xl0,*xl2,*xl4;
+ double *xa0,*xa2,*xa4;
+ double *xb0,*xb2,*xb4;
+
+ double *xka, *xks, *xvka, *xvks;
+
+
+ double mat[19][19] = {{.382, .382, .343, .382, .382, .382, .382, .128, .040, .128, .040, .128, .040, .128, .040, .128, .343, .128, .040 },
+ { .382, .382, .128, .343, .343, .343, .343, .128, .040, .128, .040, .128, .040, .128, .040, .128, .128, .040, .040 },
+ { .343, .128, .343, .382, .382, .382, .343, .128, .040, .128, .128, .343, .128, .343, .128, .343, .343, .128, .040 },
+ { .382, .343, .382, .343, .343, .343, .343, .343, .040, .343, .343, .382, .343, .382, .343, .382, .382, .382, .343 },
+ { .382, .343, .382, .343, .382, .382, .382, .343, .040, .343, .128, .343, .128, .128, .128, .343, .343, .128, .040 },
+ { .382, .343, .382, .343, .382, .382, .382, .343, .040, .343, .128, .343, .128, .128, .040, .128, .128, .128, .040 },
+ { .382, .343, .343, .343, .382, .382, .382, .343, .040, .343, .128, .343, .128, .128, .128, .128, .343, .128, .040 },
+ { .128, .128, .128, .343, .343, .343, .343, .343, .040, .343, .128, .343, .128, .343, .128, .343, .343, .128, .040 },
+ { .040, .040, .040, .040, .040, .040, .040, .040, .040, .382, .382, .382, .343, .343, .343, .128, .128, .343, .128 },
+ { .128, .128, .128, .343, .343, .343, .343, .343, .382, .040, .040, .128, .128, .040, .128, .040, .040, .040, .040 },
+ { .040, .040, .128, .343, .128, .128, .128, .128, .382, .040, .343, .343, .343, .343, .128, .128, .128, .128, .128 },
+ { .128, .128, .343, .382, .343, .343, .343, .343, .382, .128, .343, .343, .343, .343, .343, .128, .128, .343, .343 },
+ { .040, .040, .128, .343, .128, .128, .128, .128, .343, .128, .343, .343, .343, .382, .343, .343, .343, .343, .343 },
+ { .128, .128, .343, .382, .128, .128, .128, .343, .343, .040, .343, .343, .382, .343, .382, .128, .128, .343, .343 },
+ { .040, .040, .128, .343, .128, .040, .128, .128, .343, .128, .128, .343, .343, .382, .382, .343, .382, .382, .343 },
+ { .128, .128, .343, .382, .343, .128, .128, .343, .128, .040, .128, .128, .343, .128, .343, .343, .343, .382, .382 },
+ { .343, .128, .343, .382, .343, .128, .343, .343, .128, .040, .128, .128, .343, .128, .382, .343, .382, .343, .128 },
+ { .128, .040, .128, .382, .128, .128, .128, .128, .343, .040, .128, .343, .343, .343, .382, .382, .343, .343, .343 },
+ {.040, .040, .040, .343, .040, .040, .040, .040, .128, .040, .128, .343, .343, .343, .343, .382, .128, .343, .382 }};
+
+ SEXP rka;
+ SEXP rks;
+ SEXP rvka;
+ SEXP rvks;
+ SEXP res;
+
+ /* -- addition fevrier 2012 --*/
+ SEXP rl0;
+ SEXP rl2;
+ SEXP rl4;
+
+ SEXP ra0;
+ SEXP ra2;
+ SEXP ra4;
+
+ SEXP rb0;
+ SEXP rb2;
+ SEXP rb4;
+
+
+
+ /* --------------------------- */
+
+/* SEXP lsequtil; The effective number of sites used, not used yet */
+
+ debugon = INTEGER_VALUE(debugkaks);
+ totseqs = INTEGER_VALUE(nbseq);
+
+ if(debugon) Rprintf("C> mode degug is on at C level with %d sequences\n", totseqs);
+
+/******************************************************************************/
+/* */
+/* Transient storage allocation with R_alloc: R will reclaim the memory at */
+/* the end of the call to kaks. R_alloc do its own error checking and will */
+/* raise an error if the memory cannot be allocated. */
+/* */
+/******************************************************************************/
+
+ seq = (char **) R_alloc(totseqs, sizeof(char *));
+ /*
+ Initialisation of seq so that seq[i] points to sequence number i:
+ */
+ for(i = 0 ; i < totseqs ; i++){
+ seq[i] = (char *) CHAR(STRING_ELT(sequences, i));
+ if(debugon) Rprintf("-->%s<--\n", seq[i]);
+ }
+ /* The length of the first sequence is used as a reference since in an
+ alignment all sequences are supposed to be of the same length, this point
+ is controlled before call to kaks at the R level.
+ */
+
+ lgseq = strlen(seq[0]);
+ if(debugon) Rprintf("C> lgseq = %d\n", lgseq);
+
+
+ seqIn = (char **) R_alloc(totseqs, sizeof(char *));
+
+ for(i = 0 ; i < totseqs ; i++){
+ seqIn[i]= (char *) R_alloc(lgseq + 1, sizeof(char));
+ }
+
+ for (i = 0 ; i < 64 ; i++) {
+ tl0[i] = (double *) R_alloc(64, sizeof(double));
+ tl1[i] = (double *) R_alloc(64, sizeof(double));
+ tl2[i] = (double *) R_alloc(64, sizeof(double));
+ tti0[i] = (double *) R_alloc(64, sizeof(double));
+ tti1[i] = (double *) R_alloc(64, sizeof(double));
+ tti2[i] = (double *) R_alloc(64, sizeof(double));
+ ttv0[i] = (double *) R_alloc(64, sizeof(double));
+ ttv1[i] = (double *) R_alloc(64, sizeof(double));
+ ttv2[i] = (double *) R_alloc(64, sizeof(double));
+ }
+
+ for (i = 0; i < 21 ; i++)
+ rl[i] = (double *) R_alloc(21, sizeof(double));
+
+ ka = (double **) R_alloc(totseqs, sizeof(double *));
+ ks = (double **) R_alloc(totseqs, sizeof(double *));
+ vka = (double **) R_alloc(totseqs, sizeof(double *));
+ vks = (double **) R_alloc(totseqs, sizeof(double *));
+
+ l0 = (double **) R_alloc(totseqs, sizeof(double *));
+ l2 = (double **) R_alloc(totseqs, sizeof(double *));
+ l4 = (double **) R_alloc(totseqs, sizeof(double *));
+
+ a0 = (double **) R_alloc(totseqs, sizeof(double *));
+ a2 = (double **) R_alloc(totseqs, sizeof(double *));
+ a4 = (double **) R_alloc(totseqs, sizeof(double *));
+
+
+ b0 = (double **) R_alloc(totseqs, sizeof(double *));
+ b2 = (double **) R_alloc(totseqs, sizeof(double *));
+ b4 = (double **) R_alloc(totseqs, sizeof(double *));
+
+
+ for (i = 0; i < totseqs; i++) {
+ ka[i] = (double *) R_alloc(totseqs, sizeof(double));
+ vka[i] = (double *) R_alloc(totseqs, sizeof(double));
+ ks[i] = (double *) R_alloc(totseqs, sizeof(double));
+ vks[i] = (double *) R_alloc(totseqs, sizeof(double));
+ l0[i] = (double *) R_alloc(totseqs, sizeof(double));
+ l2[i] = (double *) R_alloc(totseqs, sizeof(double));
+ l4[i] = (double *) R_alloc(totseqs, sizeof(double));
+ a0[i] = (double *) R_alloc(totseqs, sizeof(double));
+ a2[i] = (double *) R_alloc(totseqs, sizeof(double));
+ a4[i] = (double *) R_alloc(totseqs, sizeof(double));
+ b0[i] = (double *) R_alloc(totseqs, sizeof(double));
+ b2[i] = (double *) R_alloc(totseqs, sizeof(double));
+ b4[i] = (double *) R_alloc(totseqs, sizeof(double));
+
+
+ }
+
+/******************************************************************************/
+/* */
+/* Make a local copy of sequence into char **seqIn because the sequences are */
+/* modified by the program before computations (gap removal) */
+/* */
+/******************************************************************************/
+
+ for(i = 0 ; i < totseqs ; i++){
+ for(j = 0 ; j < lgseq ; j++){
+ seqIn[i][j] = seq[i][j];
+ }
+ seqIn[i][lgseq] = '\0';
+ }
+
+/******************************************************************************/
+/* */
+/* Creation of R objects in the C code */
+/* */
+/******************************************************************************/
+
+
+ PROTECT(res = NEW_LIST(14));
+ PROTECT(rka = NEW_NUMERIC(totseqs*totseqs));
+ PROTECT(rks = NEW_NUMERIC(totseqs*totseqs));
+ PROTECT(rvka = NEW_NUMERIC(totseqs*totseqs));
+ PROTECT(rvks = NEW_NUMERIC(totseqs*totseqs));
+
+ /* -- addition fevrier 2012 --*/
+
+ PROTECT(rl0 = NEW_NUMERIC(totseqs*totseqs));
+ PROTECT(rl2 = NEW_NUMERIC(totseqs*totseqs));
+ PROTECT(rl4 = NEW_NUMERIC(totseqs*totseqs));
+ PROTECT(ra0 = NEW_NUMERIC(totseqs*totseqs));
+ PROTECT(ra2 = NEW_NUMERIC(totseqs*totseqs));
+ PROTECT(ra4 = NEW_NUMERIC(totseqs*totseqs));
+ PROTECT(rb0 = NEW_NUMERIC(totseqs*totseqs));
+ PROTECT(rb2 = NEW_NUMERIC(totseqs*totseqs));
+ PROTECT(rb4 = NEW_NUMERIC(totseqs*totseqs));
+
+
+
+
+/* PROTECT(rl024 = NEW_NUMERIC(3));
+ PROTECT(ra024 = NEW_NUMERIC(3));
+ PROTECT(rb024 = NEW_NUMERIC(3));*/
+ /* --------------------------- */
+
+ for (i = 2; i < 21; i++) {
+ for (j = 1; j < i; j++) {
+ *(rl[i] + j) = mat[j-1][i-2] ;
+ }
+ }
+
+ for (i = 1; i <= 20; i++) {
+ *(rl[i] + i) = 1.0;
+ for (j = i + 1; j <= 20; j++)
+ *(rl[i] + j) = *(rl[j] + i);
+ }
+
+
+/******************************************************************************/
+/* */
+/* Replace codons with non ACGT bases with --- */
+/* */
+/******************************************************************************/
+
+ for (i = 0 ; i < totseqs ; i++){
+ for(j = 0 ; j < lgseq ; j++){
+ if ((*(seqIn[i] + j) != 'A') && (*(seqIn[i] + j) != 'G') && (*(seqIn[i] + j) != 'C') && (*(seqIn[i] + j) != 'T') ) {
+ /* Base in first codon position */
+ if (j % 3 == 0) {
+ *(seqIn[i] + j) = '-';
+ *(seqIn[i] + j + 1) = '-';
+ *(seqIn[i] + j + 2) = '-';
+ }
+ /* Base in second codon position */
+ if (j % 3 == 1) {
+ *(seqIn[i] + j) = '-';
+ *(seqIn[i] + j + 1) = '-';
+ *(seqIn[i] + j - 1) = '-';
+ }
+ /* Base in third codon position */
+ if (j % 3 == 2) {
+ *(seqIn[i] + j) = '-';
+ *(seqIn[i] + j - 1) = '-';
+ *(seqIn[i] + j - 2) = '-';
+ }
+ }
+ }
+ }
+
+/******************************************************************************/
+/* */
+/* Remove positions with gaps */
+/* */
+/******************************************************************************/
+
+ reresh(seqIn,totseqs,0);
+
+ for(i = 0 ; i < totseqs ; i++){
+ if(debugon) Rprintf("reresh-->%s<--\n", seqIn[i]);
+ }
+ for(i = 0 ; i < totseqs ; i++){
+ for(j = 0 ; j < totseqs ; j++){
+ ka[i][j] = -1;
+ ks[i][j] = -1;
+ vka[i][j] = -1;
+ vks[i][j] = -1;
+ l0[i][j] = 0;
+ l2[i][j] = 0;
+ l4[i][j] = 0;
+ a0[i][j] = 0;
+ a2[i][j] = 0;
+ a4[i][j] = 0;
+ b0[i][j] = 0;
+ b2[i][j] = 0;
+ b4[i][j] = 0;
+
+
+
+ }
+ }
+
+
+ prefastlwl(rl, tl0, tl1, tl2, tti0, tti1, tti2, ttv0, ttv1, ttv2);
+
+/*
+ Dump memoire commenterise
+ if(debugon){
+ FILE *out;
+ out = fopen("dumpkaks", "w");
+ for(i = 0 ; i < 21 ; i++){
+ for(j = 0 ; j < 21 ; j++){
+ fprintf(out, "%lf\n", rl[i][j]);
+ }
+ }
+ for(i = 0 ; i < 64 ; i++){
+ for(j = 0 ; j < 64 ; j++){
+ fprintf(out, "%lf\n", tl0[i][j]);
+ fprintf(out, "%lf\n", tl1[i][j]);
+ fprintf(out, "%lf\n", tl2[i][j]);
+ fprintf(out, "%lf\n", tti0[i][j]);
+ fprintf(out, "%lf\n", tti1[i][j]);
+ fprintf(out, "%lf\n", tti2[i][j]);
+ fprintf(out, "%lf\n", ttv0[i][j]);
+ fprintf(out, "%lf\n", ttv1[i][j]);
+ fprintf(out, "%lf\n", ttv2[i][j]);
+ }
+ }
+ fclose(out);
+ }
+*/
+
+ lgseq = strlen(seqIn[0]);
+ /* l024 = NUMERIC_POINTER(rl024);
+ a024 = NUMERIC_POINTER(ra024);
+ b024 = NUMERIC_POINTER(rb024);*/
+ fastlwl(seqIn, totseqs, lgseq, ka, ks, tti0, tti1, tti2, ttv0, ttv1, ttv2, tl0, tl1, tl2, vka, vks,l0,l2,l4,a0,a2,a4,b0,b2,b4);
+
+ for(i = 0 ; i < totseqs ; i++){
+ if(debugon) Rprintf(" -->%s<--\n", seqIn[i]);
+ }
+
+/******************************************************************************/
+/* */
+/* In this section we copy the results from ka, ks, vka and vks into the R */
+/* objects rka, rks, rvka and rvka, respectively. */
+/* */
+/******************************************************************************/
+
+ n = 0;
+ xka = NUMERIC_POINTER(rka);
+ xks = NUMERIC_POINTER(rks);
+ xvka = NUMERIC_POINTER(rvka);
+ xvks = NUMERIC_POINTER(rvks);
+ xl0 = NUMERIC_POINTER(rl0);
+ xl2 = NUMERIC_POINTER(rl2);
+ xl4 = NUMERIC_POINTER(rl4);
+ xa0 = NUMERIC_POINTER(ra0);
+ xa2 = NUMERIC_POINTER(ra2);
+ xa4 = NUMERIC_POINTER(ra4);
+ xb0 = NUMERIC_POINTER(rb0);
+ xb2 = NUMERIC_POINTER(rb2);
+ xb4 = NUMERIC_POINTER(rb4);
+
+
+
+ for(i = 0 ; i < totseqs ; i++){
+ for(j = 0 ; j < totseqs ; j++){
+ xka[n] = ka[i][j];
+ xks[n] = ks[i][j];
+ xvka[n] = vka[i][j];
+ xvks[n] = vks[i][j];
+ xl0[n] = l0[i][j];
+ xl2[n] = l2[i][j];
+ xl4[n] = l4[i][j];
+ xa0[n] = a0[i][j];
+ xa2[n] = a2[i][j];
+ xa4[n] = a4[i][j];
+ xb0[n] = b0[i][j];
+ xb2[n] = b2[i][j];
+ xb4[n] = b4[i][j];
+ if(debugon) Rprintf("C> i = %d, j = %d, n = %d, ka = %lf, ks = %lf, vka = %lf, vks = %lf, l0 = %lf, l2 = %lf, l4 = %lf, a0 = %lf, a2 = %lf, a4 = %lf, b0 = %lf, b2 = %lf, b4 = %lf\n", i, j, n, ka[i][j], ks[i][j], vka[i][j], vks[i][j],l0[i][j],l2[i][j],l4[i][j],a0[i][j],a2[i][j],a4[i][j],b0[i][j],b2[i][j],b4[i][j] );
+ n++;
+ }
+ }
+ SET_ELEMENT(res, 0, rka);
+ SET_ELEMENT(res, 1, rks);
+ SET_ELEMENT(res, 2, rvka);
+ SET_ELEMENT(res, 3, rvks);
+ SET_ELEMENT(res, 4, rl0);
+ SET_ELEMENT(res, 5, rl2);
+ SET_ELEMENT(res, 6, rl4);
+ SET_ELEMENT(res, 7, ra0);
+ SET_ELEMENT(res, 8, ra2);
+ SET_ELEMENT(res, 9, ra4);
+ SET_ELEMENT(res, 10, rb0);
+ SET_ELEMENT(res, 11, rb2);
+ SET_ELEMENT(res, 12, rb4);
+ UNPROTECT(14);
+
+ if(debugon) Rprintf("C> %s", "End of C level....................\n");
+ return(res);
+}
+
+
+int num(char *cod)
+{
+ int n1, n2, n3;
+
+ n1 = n2 = n3 = 0;
+ if (cod[0] == 'C')
+ n1 = 1;
+ if (cod[1] == 'C')
+ n2 = 1;
+ if (cod[2] == 'C')
+ n3 = 1;
+ if (cod[0] == 'G')
+ n1 = 2;
+ if (cod[1] == 'G')
+ n2 = 2;
+ if (cod[2] == 'G')
+ n3 = 2;
+ if (cod[0] == 'T')
+ n1 = 3;
+ if (cod[1] == 'T')
+ n2 = 3;
+ if (cod[2] == 'T')
+ n3 = 3;
+
+ return 16 * n1 + 4 * n2 + n3;
+}
+
+
+int fastlwl(char **seq, int nbseq, int lgseq, double **ka, double **ks,
+ double **tti0, double **tti1, double **tti2, double **ttv0,
+ double **ttv1, double **ttv2, double **tl0, double **tl1,
+ double **tl2, double **vka, double **vks, double **l0, double **l2,double **l4, double **a0, double **a2,double **a4, double **b0, double **b2,double **b4)
+{
+ const double trois = 3.0;
+ double l[3], a[3], b[3], p[3], q[3], ti[3], tv[3], cc[3],
+ aaa[3], bb[3], flgseq, va[3], vb[3];
+ char cod1[3], cod2[3];
+ int i, j, ii, num1, num2, sat, sat1, sat2;
+ sat = sat1 = sat2 = 2;
+ /*
+ Internal check at C level: this should be no more be necessary, I'll keep it just in case. JRL - 26-APR-2009
+ */
+ flgseq = (double) lgseq;
+ if (flgseq / trois != lgseq / 3) {
+ REprintf("Fatal error: the number of nucleotide after gap removal is not a multiple of 3.\nPlease report this bug on the seqinr diffusion list.\n");
+ return(0); /* Should be R's NA but an int is returned by fastlwl */
+ }
+ for (i = 0; i < nbseq - 1; i++) {
+ for (j = i + 1; j < nbseq; j++) {
+ l[0] = l[1] = l[2] = 0;
+ ti[0] = ti[1] = ti[2] = tv[0] = tv[1] = tv[2] = 0;
+ for (ii = 0; ii < lgseq / 3; ii++) {
+ cod1[0] = *(seq[i] + 3 * ii);
+ cod1[1] = *(seq[i] + 3 * ii + 1);
+ cod1[2] = *(seq[i] + 3 * ii + 2);
+ cod2[0] = *(seq[j] + 3 * ii);
+ cod2[1] = *(seq[j] + 3 * ii + 1);
+ cod2[2] = *(seq[j] + 3 * ii + 2);
+ num1 = num(cod1);
+ num2 = num(cod2);
+ l[0] += *(tl0[num1] + num2);
+ l[1] += *(tl1[num1] + num2);
+ l[2] += *(tl2[num1] + num2);
+ ti[0] += *(tti0[num1] + num2);
+ ti[1] += *(tti1[num1] + num2);
+ ti[2] += *(tti2[num1] + num2);
+ tv[0] += *(ttv0[num1] + num2);
+ tv[1] += *(ttv1[num1] + num2);
+ tv[2] += *(ttv2[num1] + num2);
+ }
+ l0[i][j]=l[0];
+ l2[i][j]=l[1];
+ l4[i][j]=l[2];
+ for (ii = 0; ii < 3; ii++) {
+ p[ii] = ti[ii] / l[ii];
+ q[ii] = tv[ii] / l[ii];
+ aaa[ii] = 1 / (1 - 2 * p[ii] - q[ii]);
+ bb[ii] = 1 / (1 - 2 * q[ii]);
+ cc[ii] = (aaa[ii] + bb[ii]) / 2;
+ if (bb[ii] <= 0) {
+ b[ii] = 10;
+ } else {
+ b[ii] = 0.5 * (double) log(bb[ii]);
+ }
+ if ((aaa[ii] <= 0) || (bb[ii] <= 0)) {
+ a[ii] = 10;
+ } else {
+ a[ii] = 0.5 * (double) log(aaa[ii]) - 0.25 * log(bb[ii]);
+ }
+ va[ii] = (aaa[ii] * aaa[ii] * p[ii] + cc[ii] * cc[ii] * q[ii] - (aaa[ii] * p[ii] + cc[ii] * q[ii]) * ( aaa[ii] * p[ii] + cc[ii] * q[ii])) / l[ii];
+ vb[ii] = bb[ii] * bb[ii] * q[ii] * (1 - q[ii]) / l[ii];
+ }
+
+ if ((a[1] != 10) && (a[2] != 10) && (b[2] != 10)){
+ ks[i][j] = (l[1] * a[1] + l[2] * a[2]) / (l[2] + l[1]) + b[2];
+ vks[i][j] = (l[1] * l[1] * va[1] + l[2] * l[2] * va[2]) / ((l[1] + l[2]) * (l[1]+l[2])) + vb[2] - bb[2] * q[2] * (2 * aaa[2] * p[2] - cc[2] * (1 - q[2]))/(l[1]+l[2]);
+ } else {
+ sat1 = 1;
+ vks[i][j]=ks[i][j] = 9.999999;
+ }
+ if ((a[0] != 10) && (b[0] != 10) && (b[1] != 10)){
+ ka[i][j] = a[0] + (l[0] * b[0] + l[1] * b[1]) / (l[0] + l[1]);
+ vka[i][j] = (l[0] * l[0] * vb[0] + l[1] * l[1] * vb[1]) / ((l[1] + l[0]) * (l[1]+l[0])) + va[0] - bb[0] * q[0] * (2 * aaa[0] * p[0] - cc[0] * (1 - q[0]))/(l[1]+l[0]);
+ } else {
+ vka[i][j]=ka[i][j] = 9.999999;
+ sat2 = 1;
+ }
+ a0[i][j]=a[0];
+ a2[i][j]=a[1];
+ a4[i][j]=a[2];
+ b0[i][j]=b[0];
+ b2[i][j]=b[1];
+ b4[i][j]=b[2];
+
+ }
+ }
+
+ /* -- addition fevrier 2012 --*/
+ /* L0, L2, L4: # of non-synonymous sites, of 2-fold synonymous sites, of 4-fold synonymous sites
+
+ A0, A2, A4: # of transitional changes at non-synonymous, 2-fold, and 4-fold synonymous sites
+
+ B0, B2, B4: # of transversional changes at non-synonymous, 2-fold, and 4-fold synonymous sites
+
+ Ces quantit�s sont les suivantes dans la fonction fastlwl():
+ L0, L2, l4 correspondent � l[0], l[1], l[2]
+ A0, A2, A4 correspondent � a[0], a[1], a[2]
+ B0, B2, B4 correspondent � b[0], b[1], b[2]
+ */
+
+
+
+
+
+ if (sat1 == 1)
+ sat = 1;
+ if (sat2 == 1)
+ sat = 0;
+ return sat;
+}
+
+
+int catsite(char c1, char c2, char c3, int i) {
+
+ /* renvoie 0 si le site i du codon c1c2c3 est non degenere */
+ /* 1 2-fold degenerate */
+ /* 2 4-fold degenerate */
+
+ if (i == 3) {
+ if( !code_mt ) {
+ if ( (c1 == 'A') && (c2 == 'T') && (c3 == 'G'))
+ return 0;
+ if ( (c1 == 'T') && (c2 == 'G') && (c3 == 'A'))
+ return 0;
+ if ( (c1 == 'T') && (c2 == 'G') && (c3 == 'G'))
+ return 0;
+ }
+ if (c2 == 'C')
+ return 2;
+ if ((c1 == 'C') && (c2 == 'T'))
+ return 2;
+ if ((c1 == 'G') && (c2 == 'T'))
+ return 2;
+ if ((c1 == 'G') && (c2 == 'G'))
+ return 2;
+ if ((c1 == 'C') && (c2 == 'G'))
+ return 2;
+ return 1;
+ }
+ else if (i == 1) {
+ if ((c1 == 'C') && (c2 == 'T') && (c3 == 'A'))
+ return 1;
+ if ((c1 == 'C') && (c2 == 'T') && (c3 == 'G'))
+ return 1;
+ if ((c1 == 'T') && (c2 == 'T') && (c3 == 'A'))
+ return 1;
+ if ((c1 == 'T') && (c2 == 'T') && (c3 == 'G'))
+ return 1;
+ if( !code_mt ) {
+ if ((c1 == 'A') && (c2 == 'G') && (c3 == 'A'))
+ return 1;
+ if ((c1 == 'A') && (c2 == 'G') && (c3 == 'G'))
+ return 1;
+ if ((c1 == 'C') && (c2 == 'G') && (c3 == 'A'))
+ return 1;
+ if ((c1 == 'C') && (c2 == 'G') && (c3 == 'G'))
+ return 1;
+ }
+ return 0;
+ }
+ return 0;
+}
+
+
+char transf(char nt1, char nt2)
+{
+ if (nt1 == nt2) {
+ Rprintf("Same nt, patate.\n");
+ return 'S';
+ }
+ if ((nt1 == 'A') && (nt2 == 'C'))
+ return 'v';
+ if ((nt1 == 'A') && (nt2 == 'G'))
+ return 'i';
+ if ((nt1 == 'A') && (nt2 == 'T'))
+ return 'v';
+ if ((nt1 == 'G') && (nt2 == 'C'))
+ return 'v';
+ if ((nt1 == 'G') && (nt2 == 'T'))
+ return 'v';
+ if ((nt1 == 'C') && (nt2 == 'T'))
+ return 'i';
+ if ((nt1 == 'C') && (nt2 == 'A'))
+ return 'v';
+ if ((nt1 == 'G') && (nt2 == 'A'))
+ return 'i';
+ if ((nt1 == 'T') && (nt2 == 'A'))
+ return 'v';
+ if ((nt1 == 'C') && (nt2 == 'G'))
+ return 'v';
+ if ((nt1 == 'T') && (nt2 == 'G'))
+ return 'v';
+ if ((nt1 == 'T') && (nt2 == 'C'))
+ return 'i';
+
+ REprintf("Error\n%c, %c\n", nt1, nt2);
+ return 'E';
+}
+
+
+void titv1(char *cod1, char *cod2, double poids, double *ti, double *tv, double* l)
+{
+ int i;
+ char a, b, ci1, ci2, ci3, cj1, cj2, cj3;
+ char transf(char, char);
+
+ ci1 = cod1[0];
+ ci2 = cod1[1];
+ ci3 = cod1[2];
+ cj1 = cod2[0];
+ cj2 = cod2[1];
+ cj3 = cod2[2];
+
+
+
+
+
+ for (i = 0; i <= 2; i++)
+ if (cod1[i] != cod2[i]) {
+
+ l[catsite(ci1, ci2, ci3, i + 1)]+=0.5 * poids;
+ l[catsite(cj1, cj2, cj3, i + 1)]+=0.5 * poids;
+
+ a = cod1[i];
+ b = cod2[i];
+ if (transf(a, b) == 'i') {
+ ti[catsite(ci1, ci2, ci3, i + 1)] += 0.5 * poids;
+ ti[catsite(cj1, cj2, cj3, i + 1)] += 0.5 * poids;
+ } else {
+ tv[catsite(ci1, ci2, ci3, i + 1)] += 0.5 * poids;
+ tv[catsite(cj1, cj2, cj3, i + 1)] += 0.5 * poids;
+ }
+
+ if( code_mt ) continue; /* il n'y a plus les pb de TI non-syno et de TV syno avec code_mt ! */
+
+ if (((ci2 == 'T') && (cj2 == 'T')) || ((ci2 == 'G') && (cj2 == 'G'))) { /* T ou G ensemble en pos 2 des 2 codons */
+
+
+if (i==0){ /* pos 1 */
+ /* tous ces cas sont des transitions en un site 2-fold non-syno pour le code universel:
+il faut les enlever du comptage des TI 2-fold (ti[1]) et les ajouter au comptage des TV 2-fold (tv[1])
+pour le code_mt ce sont des sites non dege qui ont ete traites simplement comme il faut */
+ if ((ci1 == 'C') && (ci2 == 'G') && (ci3 == 'A') && (cj1 == 'T') && (cj2 == 'G') && (cj3 == 'A')) {
+ ti[1] -= 0.5 * poids; /* CGA / TGA */
+ tv[1] += 0.5 * poids;
+ }
+ if ((ci1 == 'C') && (ci2 == 'G') && (ci3 == 'G') && (cj1 == 'T') && (cj2 == 'G') && (cj3 == 'G')) {
+ ti[1] -= 0.5 * poids; /* CGG / TGG */
+ tv[1] += 0.5 * poids;
+ }
+ if ((ci1 == 'A') && (ci2 == 'G') && (ci3 == 'G') && (cj1 == 'G') && (cj2 == 'G') && (cj3 == 'G')) {
+ ti[1] -= 0.5 * poids; /* AGG / GGG */
+ tv[1] += 0.5 * poids;
+ }
+ if ((ci1 == 'A') && (ci2 == 'G') && (ci3 == 'A') && (cj1 == 'G') && (cj2 == 'G') && (cj3 == 'A')) {
+ ti[1] -= 0.5 * poids; /* AGA / GGA */
+ tv[1] += 0.5 * poids;
+ }
+ if ((ci1 == 'T') && (ci2 == 'G') && (ci3 == 'A') && (cj1 == 'C') && (cj2 == 'G') && (cj3 == 'A')) {
+ ti[1] -= 0.5 * poids; /* TGA / CGA */
+ tv[1] += 0.5 * poids;
+ }
+ if ((ci1 == 'T') && (ci2 == 'G') && (ci3 == 'G') && (cj1 == 'C') && (cj2 == 'G') && (cj3 == 'G')) {
+ ti[1] -= 0.5 * poids; /* TGG / CGG */
+ tv[1] += 0.5 * poids;
+ }
+ if ((ci1 == 'G') && (ci2 == 'G') && (ci3 == 'G') && (cj1 == 'A') && (cj2 == 'G') && (cj3 == 'G')) {
+ ti[1] -= 0.5 * poids; /* GGG / AGG */
+ tv[1] += 0.5 * poids;
+ }
+ if ((ci1 == 'G') && (ci2 == 'G') && (ci3 == 'A') && (cj1 == 'A') && (cj2 == 'G') && (cj3 == 'A')) {
+ ti[1] -= 0.5 * poids; /* GGA / AGA */
+ tv[1] += 0.5 * poids;
+ }
+
+
+/* tous ces cas sont
+code universel: TV syno en sites 2-fold il faut les enlever du comptage des TV 2-fold (tv[1]) et les ajouter au comptage des TI 2-fold (ti[1])
+code_mt: TV non syno en site non dege qui ont ete correctement comptes
+*/
+ if ((ci1 == 'C') && (ci2 == 'G') && (ci3 == 'A') && (cj1 == 'A') && (cj2 == 'G') && (cj3 == 'A')) {
+ tv[1] -= poids; /* CGA / AGA : TV syno code univ, non code mt */
+ ti[1] += poids;
+ }
+ if ((ci1 == 'A') && (ci2 == 'G') && (ci3 == 'A') && (cj1 == 'C') && (cj2 == 'G') && (cj3 == 'A')) {
+ tv[1] -= poids; /* AGA / CGA : TV syno code univ, non code mt */
+ ti[1] += poids;
+ }
+ if ((ci1 == 'C') && (ci2 == 'G') && (ci3 == 'G') && (cj1 == 'A') && (cj2 == 'G') && (cj3 == 'G')) {
+ tv[1] -= poids; /* CGG / AGG : TV syno code univ, non code mt */
+ ti[1] += poids;
+ }
+ if ((ci1 == 'A') && (ci2 == 'G') && (ci3 == 'G') && (cj1 == 'C') && (cj2 == 'G') && (cj3 == 'G')) {
+ tv[1] -= poids; /* AGG / CGG : TV syno code univ, non code mt */
+ ti[1] += poids;
+ }
+}
+
+if (i==2){ /* pos 3 */
+/* tous ces cas sont
+code universel: des TV syno en site 2-fold il faut les enlever des TV 2-fold (iv[1]) et ajouter aux TI 2-fold (ti[1])
+code_mt: ce sont des TV non syno en site 2-fold qui int ete comptees normalement
+*/
+ if ((ci1 == 'A') && (ci2 == 'T') && (ci3 == 'A') && (cj1 == 'A') && (cj2 == 'T') && (cj3 == 'T')) {
+ tv[1] -= poids; /* TV ATA / ATT : syno code univ, non code mt */
+ ti[1] += poids;
+ }
+ if ((ci1 == 'A') && (ci2 == 'T') && (ci3 == 'T') && (cj1 == 'A') && (cj2 == 'T') && (cj3 == 'A')) {
+ tv[1] -= poids; /* TV ATT / ATA : syno code univ, non code mt */
+ ti[1] += poids;
+ }
+ if ((ci1 == 'A') && (ci2 == 'T') && (ci3 == 'A') && (cj1 == 'A') && (cj2 == 'T') && (cj3 == 'C')) {
+ tv[1] -= poids; /* TV ATA / ATC : syno code univ, non code mt */
+ ti[1] += poids;
+ }
+ if ((ci1 == 'A') && (ci2 == 'T') && (ci3 == 'C') && (cj1 == 'A') && (cj2 == 'T') && (cj3 == 'A')) {
+ tv[1] -= poids; /* TV ATC / ATA : syno code univ, non code mt */
+ ti[1] += poids;
+ }
+
+
+
+/* ces 2 cas sont
+code universel: des TI non syno en site 2-fold il faut les enlever des TI 2-fold (ti[1]) et les ajouter aux TV 2-fold (tv[1])
+code_mt: des TI syno en site 2-fold qui ont ete comptees normalement
+*/
+ if ((ci1 == 'A') && (ci2 == 'T') && (ci3 == 'A') && (cj1 == 'A') && (cj2 == 'T') && (cj3 == 'G')) {
+ ti[1] -= 0.5 * poids; /* TI ATA / ATG : non syno code univ, syno code mt */
+ tv[1] += 0.5 * poids;
+ }
+ if ((ci1 == 'A') && (ci2 == 'T') && (ci3 == 'G') && (cj1 == 'A') && (cj2 == 'T') && (cj3 == 'A')) {
+ ti[1] -= 0.5 * poids; /* TI ATG / ATA : non syno code univ, syno code mt */
+ tv[1] += 0.5 * poids;
+ }
+}
+ }
+
+ }
+}
+
+
+void titv2(char *cod1, char *cod2, double *ti, double *tv, double* l, int *aa, double **rl, int* pos)
+{
+
+ char codint1[3], codint2[3];
+ int i, j, n = 0;
+ double l1, l2, p1, p2;
+ void titv1(char *, char *, double, double *, double *,double*);
+
+
+ memcpy(codint1, cod1, 3);
+ memcpy(codint2, cod1, 3);
+ for (i = 0; i < 2; i++) {
+ if (cod1[i] != cod2[i])
+ codint1[i] = cod2[i];
+ if (cod1[i] != cod2[i])
+ break;
+ }
+ for (j = i + 1; j <= 2; j++) {
+ if (cod1[j] != cod2[j])
+ codint2[j] = cod2[j];
+ if (cod1[j] != cod2[j])
+ break;
+ }
+
+
+ l1 = *(rl[aa[num(cod1)]] + aa[num(codint1)]) * *(rl[aa[num(codint1)]] + aa[num(cod2)]);
+ l2 = *(rl[aa[num(cod1)]] + aa[num(codint2)]) * *(rl[aa[num(codint2)]] + aa[num(cod2)]);
+
+ p1 = l1 / (l1 + l2);
+ p2 = 1 - p1;
+ for (i=0;i<3;i++) if (pos[i]==0) n=i+1;
+ l[catsite(cod1[0], cod1[1] ,cod1[2], n)]+=0.333333;
+ l[catsite(cod2[0], cod2[1] ,cod2[2], n)]+=0.333333;
+ l[catsite(codint1[0], codint1[1] ,codint1[2], n)]+=0.333333*p1;
+ l[catsite(codint2[0], codint2[1] ,codint2[2], n)]+=0.333333*p2;
+ titv1(cod1, codint1, p1, ti, tv,l);
+ titv1(cod2, codint1, p1, ti, tv,l);
+ titv1(cod1, codint2, p2, ti, tv,l);
+ titv1(cod2, codint2, p2, ti, tv,l);
+
+
+}
+
+void titv3(char *cod1, char *cod2, double *ti, double *tv, double* l, int *aa, double **rl)
+{
+
+ char *codint1[6], *codint2[6];
+ int i, j, ii,a,b,c,d,aaa,aab,aac,aad;
+ double like[6], p[6], somli, rlab, rlbc, rlcd;
+ void titv1(char *, char *, double, double *, double *, double*);
+ int num(char *);
+
+ for (i = 0; i < 6; i++) {
+ codint1[i] = (char *) R_alloc(3, sizeof(char));
+ codint2[i] = (char *) R_alloc(3, sizeof(char));
+ }
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 3 ; j++)
+ if (j != i) {
+ if ((i == 0) || ((i == 1) && (j == 0))) {
+ ii = 3 * i + j - 1;
+ } else {
+ ii = 3 * i + j - 2;
+ }
+ memcpy(codint1[ii], cod1, 3);
+ *(codint1[ii] + i) = cod2[i];
+ memcpy(codint2[ii], codint1[ii], 3);
+ *(codint2[ii] + j) = cod2[j];
+ a=num(cod1);
+ b=num(codint1[ii]);
+ c=num(codint2[ii]);
+ d=num(cod2);
+ aaa=aa[a];
+ aab=aa[b];
+ aac=aa[c];
+ aad=aa[d];
+ rlab=*(rl[aaa]+aab);
+ rlbc=*(rl[aab]+aac);
+ rlcd=*(rl[aac]+aad);
+ like[ii] = rlab*rlbc*rlcd;
+ }
+ }
+
+ somli = 0;
+ for (i = 0; i < 6; i++)
+ somli += like[i];
+ for (i = 0; i < 6; i++) {
+ p[i] = like[i] / somli;
+ titv1(cod1, codint1[i], p[i], ti, tv,l);
+ titv1(codint1[i], codint2[i], p[i], ti, tv,l);
+ titv1(codint2[i], cod2, p[i], ti, tv,l);
+ }
+
+
+}
+
+
+
+void prefastlwl(double **rl, double **tl0, double **tl1, double **tl2, double **tti0, double **tti1, double **tti2, double **ttv0, double **ttv1, double **ttv2)
+{
+
+ double l[3], ti[3], tv[3];
+ char cod1[3], cod2[3];
+ int i, j, ii, jj, nbdiff, pos[3], aa[64], n1, n2, n3;
+ void titv2(char *, char *, double *, double *, double *, int *, double **, int *pos);
+ void titv3(char *, char *, double *, double *, double *, int *, double **);
+ void titv1(char *, char *, double, double *, double *, double *);
+ double minrl;
+
+/* code des acides amines:
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 0
+F W Y H M L I V P C A G T S Q N K R E Q stop
+*/
+
+ aa[0] = 17;/* aaa K */
+ aa[1] = 16;/* aac N */
+ aa[2] = 17;/* aag K */
+ aa[3] = 16;/* aat N */
+ aa[4] = 13;/* aca T */
+ aa[5] = 13;/* acc T */
+ aa[6] = 13;/* acg T */
+ aa[7] = 13;/* act T */
+if(code_mt)
+ aa[8] = 0;/* aga * */
+else
+ aa[8] = 18;/* aga R */
+ aa[9] = 14;/* agc S */
+if(code_mt)
+ aa[10] = 0;/* agg * */
+else
+ aa[10] = 18;/* agg R */
+ aa[11] = 14;/* agt S */
+if(code_mt)
+ aa[12] = 5;/* ata M */
+else
+ aa[12] = 7;/* ata I */
+ aa[13] = 7;/* atc I */
+ aa[14] = 5;/* atg M */
+ aa[15] = 7;/* att I */
+ aa[16] = 15;
+ aa[17] = 4;
+ aa[18] = 15;
+ aa[19] = 4;
+ aa[20] = 9;
+ aa[21] = 9;
+ aa[22] = 9;
+ aa[23] = 9;
+ aa[24] = 18;
+ aa[25] = 18;
+ aa[26] = 18;
+ aa[27] = 18;
+ aa[28] = 6;
+ aa[29] = 6;
+ aa[30] = 6;
+ aa[31] = 6;
+ aa[32] = 19;
+ aa[33] = 20;
+ aa[34] = 19;
+ aa[35] = 20;
+ aa[36] = 11;
+ aa[37] = 11;
+ aa[38] = 11;
+ aa[39] = 11;
+ aa[40] = 12;
+ aa[41] = 12;
+ aa[42] = 12;
+ aa[43] = 12;
+ aa[44] = 8;
+ aa[45] = 8;
+ aa[46] = 8;
+ aa[47] = 8;
+ aa[48] = 0;/* taa * */
+ aa[49] = 3;/* tac Y */
+ aa[50] = 0;/* tag * */
+ aa[51] = 3;/* tat Y */
+ aa[52] = 14;/* tca S */
+ aa[53] = 14;/* tcc S */
+ aa[54] = 14;/* tcg S */
+ aa[55] = 14;/* tct S */
+if(code_mt)
+ aa[56] = 2;/* tga W */
+else
+ aa[56] = 0;/* tga * */
+ aa[57] = 10;/* tgc */
+ aa[58] = 2;/* tgg W */
+ aa[59] = 10;/* tgt */
+ aa[60] = 6;/* tta */
+ aa[61] = 1;/* ttc */
+ aa[62] = 6;/* ttg */
+ aa[63] = 1;/* ttt */
+
+/* ajoute par M. Gouy */
+/* calcul minrl = val minimale du tableau rl */
+minrl=rl[1][1];
+for(i=1; i<=20; i++)
+ for(j=i+1; j<=20; j++)
+ if(rl[i][j] < minrl ) minrl=rl[i][j];
+/* chargement rl[0][i] et rl[i][0] avec minrl correspond a aa = stop */
+ for(i= 0; i<=20; i++) rl[0][i] = rl[i][0] = minrl;
+
+
+/****** for (i = 0; i < 63; i++) { je l'ai passe a 64 JRL ********/
+
+ for(i = 0; i < 64; i++) {
+ for (j = i; j < 64; j++) {
+ for(ii=0;ii<3;ii++){
+ l[ii]=ti[ii]=tv[ii]=0;
+ }
+
+
+ n1 = i / 16;
+ n2 = (i - 16 * n1) / 4;
+ n3 = i - 16 * n1 - 4 * n2;
+ cod1[0] = 'A';
+ if (n1 == 1)
+ cod1[0] = 'C';
+ if (n1 == 2)
+ cod1[0] = 'G';
+ if (n1 == 3)
+ cod1[0] = 'T';
+ cod1[1] = 'A';
+ if (n2 == 1)
+ cod1[1] = 'C';
+ if (n2 == 2)
+ cod1[1] = 'G';
+ if (n2 == 3)
+ cod1[1] = 'T';
+ cod1[2] = 'A';
+ if (n3 == 1)
+ cod1[2] = 'C';
+ if (n3 == 2)
+ cod1[2] = 'G';
+ if (n3 == 3)
+ cod1[2] = 'T';
+
+ n1 = j / 16;
+ n2 = (j - 16 * n1) / 4;
+ n3 = j - 16 * n1 - 4 * n2;
+ cod2[0] = 'A';
+ if (n1 == 1)
+ cod2[0] = 'C';
+ if (n1 == 2)
+ cod2[0] = 'G';
+ if (n1 == 3)
+ cod2[0] = 'T';
+ cod2[1] = 'A';
+ if (n2 == 1)
+ cod2[1] = 'C';
+ if (n2 == 2)
+ cod2[1] = 'G';
+ if (n2 == 3)
+ cod2[1] = 'T';
+ cod2[2] = 'A';
+ if (n3 == 1)
+ cod2[2] = 'C';
+ if (n3 == 2)
+ cod2[2] = 'G';
+ if (n3 == 3)
+ cod2[2] = 'T';
+
+
+
+
+ nbdiff = 0;
+ pos[0] = pos[1] = pos[2] = 0;
+ if (cod1[0] != cod2[0]) {
+ nbdiff++;
+ pos[0] = 1;
+ }
+ if (cod1[1] != cod2[1]) {
+ nbdiff++;
+ pos[1] = 1;
+ }
+ if (cod1[2] != cod2[2]) {
+ nbdiff++;
+ pos[2] = 1;
+ }
+ if (nbdiff != 2)
+ for (jj = 0; jj < 3; jj++)
+ if (pos[jj] == 0) {
+ l[catsite(cod1[0], cod1[1], cod1[2], jj + 1)] += 0.5;
+ l[catsite(cod2[0], cod2[1], cod2[2], jj + 1)] += 0.5;
+ }
+ if (nbdiff == 1)
+ titv1(cod1, cod2, 1.0, ti, tv, l);
+ if (nbdiff == 2)
+ titv2(cod1, cod2, ti, tv, l, aa, rl, pos);
+ if (nbdiff == 3)
+ titv3(cod1, cod2, ti, tv, l, aa, rl);
+
+ *(tl0[i]+j)=*(tl0[j]+i)=l[0];
+ *(tl1[i]+j)=*(tl1[j]+i)=l[1];
+ *(tl2[i]+j)=*(tl2[j]+i)=l[2];
+ *(tti0[i]+j)=*(tti0[j]+i)=ti[0];
+ *(tti1[i]+j)=*(tti1[j]+i)=ti[1];
+ *(tti2[i]+j)=*(tti2[j]+i)=ti[2];
+ *(ttv0[i]+j)=*(ttv0[j]+i)=tv[0];
+ *(ttv1[i]+j)=*(ttv1[j]+i)=tv[1];
+ *(ttv2[i]+j)=*(ttv2[j]+i)=tv[2];
+
+ }
+ }
+
+return;
+
+}
+
+
+void reresh(char** seq, int nbseq, int option){
+
+/* Si option = 0, toutes les positions avec au moins un gap sont eliminees */
+
+
+ int lgseq, l, drapeau, i, j, k;
+ char **seqref;
+
+ seqref = (char **) R_alloc(nbseq, sizeof(char *));
+
+ lgseq = strlen(seq[1]);
+
+ for(i = 0 ; i < nbseq ; i++){
+ seqref[i] = (char*) R_alloc(lgseq + 1, sizeof(char));
+ }
+
+
+ l=-1;
+ if (option==0){
+ for(i=0;i<lgseq;i++){
+ drapeau=0;
+ for(j=0;j<nbseq;j++){
+ if (*(seq[j]+i)=='-') drapeau=1;
+ }
+ if (drapeau==0){
+ l++;
+ for(k=0;k<nbseq;k++) *(seqref[k]+l)=*(seq[k]+i);
+ }
+ }
+ }
+ else{
+ for(i=0;i<lgseq;i++){
+ drapeau=0;
+ for(j=0;j<nbseq;j++){
+ if (*(seq[j]+i)!='-') {
+ drapeau=1;
+ break;
+ }
+ }
+ if (drapeau==1){
+ l++;
+ for(k=0;k<nbseq;k++) *(seqref[k]+l)=*(seq[k]+i);
+ }
+ }
+ }
+ for(i=0;i<nbseq;i++){
+ for (j=l+1;j<lgseq;j++) {
+ *(seqref[i]+j)='\0';
+ }
+ }
+ for (i=0;i<nbseq;i++) {
+ for (j=0;j<lgseq;j++){
+ *(seq[i]+j)=*(seqref[i]+j);
+ }
+ }
+}
+
diff --git a/src/util.c b/src/util.c
new file mode 100644
index 0000000..bfc3fc5
--- /dev/null
+++ b/src/util.c
@@ -0,0 +1,72 @@
+#include <Rinternals.h>
+#include <R.h>
+#include <Rdefines.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <math.h>
+
+
+
+
+/*##################################################*/
+/*# Converts a String into a vector of characters #*/
+/*##################################################*/
+
+
+SEXP s2c(SEXP seq){
+ char *string;
+ int lseq, i;
+ char mot[2] = {'\0', '\0'};
+
+ SEXP chaine;
+
+ string = (char *) CHAR(STRING_ELT(seq, 0));
+
+ lseq = strlen(string);
+
+ PROTECT(chaine = NEW_CHARACTER(lseq));
+
+ for(i = 0 ; i < lseq ; i++){
+ mot[0] = string[i];
+ SET_STRING_ELT(chaine, i, mkChar(mot));
+ }
+
+ UNPROTECT(1);
+ return(chaine);
+}
+
+
+/*#####################################################*/
+/*# Tester si une s�quence est prot�ique ou nucl�ique #*/
+/*#####################################################*/
+
+
+
+
+SEXP is_a_protein_seq(SEXP sequence)
+/* returns TRUE if seq looks like a protein sequence (less than 80% ACGTU) */
+{
+
+ SEXP res;
+ char *seq;
+ static char dna[]="ACGTU";
+ int total=0, length=0;
+
+ seq = (char *) CHAR(STRING_ELT(sequence,0));
+
+ while(*seq != 0) {
+ if(*seq != '-') {
+ if( strchr(dna, toupper(*seq)) != NULL ) total++;
+ length++;
+ }
+ seq++;
+ }
+
+ PROTECT(res=NEW_NUMERIC(1));
+ REAL(res)[0]=(float)(total) / length ;
+
+ UNPROTECT(1);
+ return ( res );
+}
diff --git a/src/zsockr.c b/src/zsockr.c
new file mode 100644
index 0000000..6129a88
--- /dev/null
+++ b/src/zsockr.c
@@ -0,0 +1,169 @@
+/* functions to handle zlib-compressed data read from socket
+*/
+#ifndef WIN32
+#ifdef _WIN32
+#define WIN32 1
+#endif
+#endif
+#ifndef WIN32
+#include "zlib.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#ifdef WIN32
+#include <winsock.h>
+#else
+#include <sys/select.h>
+#endif
+
+
+/* included functions */
+void *prepare_sock_gz_r(int sockr);
+int z_getc_R(void *v);
+char *z_gets(void *v, char *line, size_t len);
+char *z_read_sock(void *v);
+int close_sock_gz_r(void *v);
+
+
+
+#define ZBSIZE 100000
+typedef struct {
+ z_stream stream;
+ char z_buffer[ZBSIZE]; /* compressed input buffer */
+ char text_buffer[4 * ZBSIZE]; /* decompressed buffer */
+ char *pos, *endbuf;
+#ifdef WIN32
+ SOCKET fd;
+#else
+ int fd;
+#endif
+ } sock_gz_r;
+
+
+
+void *prepare_sock_gz_r(int sockr)
+{
+int err;
+sock_gz_r *big;
+static sock_gz_r s_big;
+
+big = &s_big;
+if(big == NULL) return NULL;
+big->stream.next_in = Z_NULL;
+big->stream.avail_in = 0;
+big->stream.avail_out = 0;
+big->stream.zalloc = Z_NULL;
+big->stream.zfree = Z_NULL;
+big->stream.opaque = NULL;
+big->pos = big->text_buffer;
+big->endbuf = big->pos;
+#ifdef WIN32
+big->fd = (SOCKET)sockr;
+#else
+big->fd = sockr;
+#endif
+err = inflateInit(&big->stream);
+return err == Z_OK ? (void *)big : NULL;
+}
+
+
+int z_getc_R(void *v)
+{
+int q, lu;
+sock_gz_r *big = (sock_gz_r *)v;
+z_streamp zs;
+#ifndef WIN32
+int err;
+fd_set readfds;
+#endif
+
+if(big->pos < big->endbuf) {
+ return *(big->pos++);
+ }
+zs = &(big->stream);
+zs->next_out = (Bytef *)big->text_buffer;
+zs->avail_out = sizeof(big->text_buffer);
+big->pos = (char *)zs->next_out;
+do {
+ if(zs->avail_in == 0) {
+#ifdef WIN32
+ do
+ lu = recv( big->fd , big->z_buffer, ZBSIZE, 0 );
+ while (lu <=0);
+#else
+ FD_ZERO(&readfds);
+ FD_SET(big->fd, &readfds);
+ err = select(big->fd + 1, &readfds, NULL, NULL, NULL);
+ if(err > 0 ) {
+ lu = read( big->fd , big->z_buffer, ZBSIZE );
+ }
+ else lu = -1;
+#endif
+ if(lu == -1) return EOF;
+ zs->next_in = (Bytef *)big->z_buffer;
+ zs->avail_in = lu;
+ }
+ q = inflate(zs, Z_NO_FLUSH);
+ if(q == Z_STREAM_END) break;
+ if(q != Z_OK) {
+ break;
+ }
+ }
+while ( (char *)zs->next_out == big->pos);
+big->endbuf = (char *)zs->next_out;
+if(big->pos < big->endbuf) return *(big->pos++);
+else
+ return EOF;
+}
+
+
+char *z_gets(void *v, char *line, size_t len)
+{
+int c;
+char *p;
+
+p = line;
+while(len > 1) {
+ c = z_getc_R( v );
+ if(c == EOF) {
+ if(p == line) return NULL;
+ break;
+ }
+ *(p++) = c;
+ if(c == '\n') break;
+ len--;
+ }
+*p = 0;
+return line;
+}
+
+
+char *z_read_sock(void *v)
+{
+static char line[500];
+char *p;
+int l;
+
+p = z_gets(v, line, sizeof(line));
+if(p == NULL) return NULL;
+l = strlen(line);
+if(l > 0 && line[l-1] == '\n') line[l-1] = 0;
+return line;
+}
+
+
+int close_sock_gz_r(void *v)
+{
+sock_gz_r *big = (sock_gz_r *)v;
+int val;
+
+val = inflateEnd(&(big->stream));
+return val;
+}
+#else
+void *prepare_sock_gz_r(int sockr) {
+return 0;
+}
+#endif
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-cran-seqinr.git
More information about the debian-med-commit
mailing list